diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h new file mode 100644 index 0000000000000000000000000000000000000000..0085e1ea934f4ee29d9f4d6a20bd6f56d8eeac60 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h @@ -0,0 +1,8 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h new file mode 100644 index 0000000000000000000000000000000000000000..44d0a3ae4365b7d938a46f4704ce11bd41e46d7d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10 { +struct OperatorName; +} + +namespace at { + +// check if an op is a custom op (i.e. did not come from native_functions.yaml) +TORCH_API bool is_custom_op(const c10::OperatorName& opName); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h new file mode 100644 index 0000000000000000000000000000000000000000..68d4b7e2e14f5851a790961d049406a6b3d6940c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h @@ -0,0 +1,51 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// Forward declarations of core ATen types used in dispatch functions +namespace c10 { + +template +class List; +template +class IListRef; +class Stream; +class Scalar; +class SymInt; +class SymIntList; +struct Storage; +struct TensorOptions; +template +class ArrayRef; +template +class OptionalArrayRef; + +} // namespace c10 + +namespace at { + +class Tensor; +class OptionalTensorRef; +struct Dimname; +struct Generator; +using TensorList = c10::ArrayRef; +using ITensorListRef = c10::IListRef; +using IOptTensorListRef = c10::IListRef; +using DimnameList = c10::ArrayRef; +using IntArrayRef = c10::ArrayRef; +using OptionalIntArrayRef = c10::OptionalArrayRef; +using OptionalSymIntArrayRef = c10::OptionalArrayRef; + +using c10::Stream; +using c10::Storage; +using c10::QScheme; +using c10::Scalar; +using c10::SymInt; +using c10::SymIntList; +using c10::TensorOptions; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e7728b4125f4dd36bcec4d0d910bb3e28e3ec42 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h @@ -0,0 +1,166 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// This global header must not depend on native_functions.yaml or +// incremental builds will be next to useless +#pragma push_macro("TORCH_ASSERT_NO_OPERATORS") +#define TORCH_ASSERT_NO_OPERATORS + +#include + +// This list of headers was generated using a script that finds +// high-impact headers and then manually tweaked to remove OS specific +// or duplicate headers (e.g. and ) and to remove +// "impl" headers (e.g BFloat16-inl.h or complex_math.h in c10). + +// To generate the initial list: +// 1. Build pytorch from scratch with all build caching disabled +// 2. Generate a build trace with ninjatracing (https://github.com/nico/ninjatracing) +// $ ninjatracing /path/to/pytorch/build/.ninja_log > trace_all.json +// 3. Run pch_gen.py from https://github.com/peterbell10/build_analysis/ +// $ python pch_gen.py --threshold .80 --target torch_cpu --build_dir /path/to/pytorch/build --trace trace_all.json +// Where the threshold can be tweaked until c10 and some of ATen +// core are included but TORCH_ASSERT_NO_OPERATORS still passes. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma pop_macro("TORCH_ASSERT_NO_OPERATORS") + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Array.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Array.h new file mode 100644 index 0000000000000000000000000000000000000000..16e370f826e940e770461d7e6c6ca1e8017ec7fd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Array.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// A fixed-size array type usable from both host and +// device code. + +#include +#include + +namespace at::detail { + +template +struct Array { + // NOLINTNEXTLINE(*c-array*) + T data[size_]; + + C10_HOST_DEVICE T operator[](int i) const { + return data[i]; + } + C10_HOST_DEVICE T& operator[](int i) { + return data[i]; + } +#if defined(USE_ROCM) + C10_HOST_DEVICE Array() = default; + C10_HOST_DEVICE Array(const Array&) = default; + C10_HOST_DEVICE Array& operator=(const Array&) = default; + C10_HOST_DEVICE Array(Array&&) = default; + C10_HOST_DEVICE Array& operator=(Array&&) = default; + C10_HOST_DEVICE ~Array() = default; +#else + Array() = default; + Array(const Array&) = default; + Array& operator=(const Array&) = default; + Array(Array&&) noexcept = default; + Array& operator=(Array&&) noexcept = default; + ~Array() = default; +#endif + static constexpr int size() { + return size_; + } + // Fill the array with x. + C10_HOST_DEVICE Array(T x) { + for (int i = 0; i < size_; i++) { + data[i] = x; + } + } +}; + +} // namespace at::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h new file mode 100644 index 0000000000000000000000000000000000000000..ae30d57c820c35c3d00aa8d6390e79d3497721ad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h @@ -0,0 +1,7 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..597e2b3720e30dc92d5ce2dfb14c51aa6778c824 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h @@ -0,0 +1,800 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") +namespace at { + +using c10::CachingAllocator::Stat; +using c10::CachingAllocator::DurationStat; + +/** + * HostBlock is typically a fundamental memory block used in pinned memory. It + * is likely related to Event and Stream of device runtime. It is probably a + * base struct or interface that can be inherited and extended by each backend. + */ +template +struct HostBlock { + // constructor for search key + HostBlock(size_t size) : size_(size) {} + + HostBlock(size_t size, void* ptr) : size_(size), ptr_(ptr) {} + + std::mutex mutex_; + size_t size_{0}; // block size in bytes + void* ptr_{nullptr}; // memory address + bool allocated_{false}; // in-use flag + size_t event_count_{0}; // number of related events + ska::flat_hash_set streams_; // streams on which the block was used +}; + +template +struct alignas(hardware_destructive_interference_size) FreeBlockList { + std::mutex mutex_; + std::deque list_; +}; + +namespace { + // Max cached block sizes: (1 << MAX_SIZE_INDEX) bytes + // NOLINTNEXTLINE(misc-definitions-in-headers) + constexpr size_t MAX_SIZE_INDEX = 64; +} + +// A large reserved pinned memory segment that is created in advance which is used +// to allocate small pinned memory requests to avoid calling into expensive APIs. +// We never free this memory and move up the pointer as we allocate new blocks +// and when blocks are freed, they are cached in the free lists. +struct PinnedReserveSegment { + PinnedReserveSegment(void *start, size_t size) : start_(start), size_(size), + current_ptr_(start_), initialized_(true) {} + + PinnedReserveSegment() : start_(nullptr), size_(0), current_ptr_(nullptr), initialized_(false) {} + + bool initialized() { + return initialized_; + } + + void* allocate(size_t bytes) { + std::lock_guard guard(mutex_); + + // Round up the requested size to 4KB boundary for all including the small ones. + size_t rounded_bytes = (bytes + 4096 - 1) & ~(4096 - 1); + + if (((uint8_t*)current_ptr_ + rounded_bytes) > ((uint8_t*)start_ + size_)) { + return nullptr; + } + + void* ptr = current_ptr_; + current_ptr_ = (uint8_t*)current_ptr_ + rounded_bytes; + return ptr; + } + + bool owns(void* ptr) { + return ptr >= start_ && ptr < (uint8_t*)start_ + size_; + } + + std::mutex mutex_; + void* start_; + size_t size_; + void* current_ptr_; + bool initialized_; +}; + +// Struct containing memory allocator summary statistics for host. +struct TORCH_API HostStats { + // COUNT: total allocations (active) + Stat active_requests; + // SUM: bytes allocated/reserved by this memory allocator. (active) + Stat active_bytes; + // COUNT: total allocations (active + free) + Stat allocations; + // SUM: bytes allocated/reserved by this memory allocator. This accounts + // for both free and in-use blocks. + Stat allocated_bytes; + + // SUM: time spent in cudaHostAlloc/cudaHostRegister in microseconds + DurationStat host_alloc_time; + + // SUM: time spent in cudaHostFree/cudaHostUnregister in microseconds + DurationStat host_free_time; + + // COUNT: number of times cudaHostAlloc/cudaHostRegister was called because + // the request could not be satisfied from existing free blocks. + int64_t num_host_alloc = 0; // This is derived from segment or timing + + // COUNT: number of times cudaHostFree/cudaHostUnregister was called. + int64_t num_host_free = 0; // This is derived from segment or timing + + // Count of cudaHostAlloc/cudaHostRegister per bucket + std::vector bucket_allocation = std::vector(MAX_SIZE_INDEX); +}; + +// Struct containing memory allocator summary statistics for host, as they +// are staged for reporting. This is a temporary struct that is used to +// avoid locking the allocator while collecting stats. +struct alignas(hardware_destructive_interference_size) HostStatsStaged { + std::mutex timing_mutex_; + // COUNT: total allocations (active + free) + // LOCK: access to this stat is protected by the allocator's blocks_mutex_ + Stat allocations; + // SUM: bytes allocated/reserved by this memory allocator. This accounts + // for both free and in-use blocks. + Stat allocated_bytes; + // COUNT: number of allocations per bucket (active) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector active_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: bytes of allocation per bucket (active) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector active_bytes_bucket_stats = std::vector(MAX_SIZE_INDEX); + // COUNT: number of allocations per bucket (active + free) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector allocation_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: bytes of allocation per bucket (active + free) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector allocated_bytes_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: time spent in cudaHostAlloc/cudaHostRegister + // LOCK: access to this stat is protected by the timing_mutex_ + DurationStat host_alloc_time; + // SUM: time spent in cudaHostFree/cudaHostUnregister + // LOCK: access to this stat is protected by the timing_mutex_ + DurationStat host_free_time; +}; + +/** + * Note [HostAllocator design] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * We have three key data structures - the free list which stores blocks that + * are not currently used, the block list which stores all blocks that have been + * allocated, and the event queue which stores runtime events and their + * corresponding blocks. + * + * Each of these are protected by a separate mutex. The key design principles + * are to 1) only hold each mutex for the minimal amount of time possible, 2) + * never do any possible expensive operations (such as CUDA runtime API calls) + * while holding the lock. + * + * There are four public methods: allocate, free, record_event and empty_cache. + * 1) In the allocate path, we first check to see if we can service our + * request from this free list, and otherwise we create a new block with + * allocate_host_memory. + * 2) In the free path, we insert events (if required) into the event queue, + * and if possible insert our block back into the free list. In allocate, we + * first eagerly query events until we find one that is not ready, and insert + * the corresponding block onto the free list if all the events recorded for a + * block are ready. + * 3) In the record_event path, we simply insert the given stream into the set + * of streams tracked by the specified block. This set of streams is then + * consumed in the free path. + * 4) In the empty_cache path, we flush any available blocks into the free + * list. Remove all element of free list, then remove them from block list and + * release the associated pinned memory allocation via free_block. + * + * We generalize the caching host allocator into two parts: interface and + * implementation. For any new backend looking to integrate with host allocator + * and reuse caching mechanism, these two parts are necessary to be specialized. + * + * For the implementation, we provide a CachingHostAllocatorImpl struct + * to abstract the caching mechanism. Any backend needs to provide a customized + * implementation by specializing its own public functions and the related + * runtime functions. Its template parameter S represents runtime Stream, E + * denotes runtime Event, B indicates the fundamental memory block. + * + * For the interface, we provide a CachingHostAllocatorInterface struct as an + * interface. Any backend needs to derive its own host allocator from this + * interface. Its template parameter T refers to an implementation that + * inherited from CachingHostAllocatorImpl. + * + * So this design can share the caching mechanism across each backend, and + * provide flexibility to each backend. A backend can choose to follow this + * implementation or reuse them by extending and overriding them as necessary. + * Taking CUDA as an example, it specializes runtime related functions to reuse + * the caching mechanism. Additionally, it extends the allocator's functionality + * by adding the allocWithCudaHostRegister function to support page-locking the + * memory range used by CUDA. Of course, you can also refer to + * XPUCachingHostAllocator, which is a host caching allocator supported on XPU + * backend, to implement a basic host caching allocator. + * + * Some of the invariants here are less strict than they could be - for example, + * we do not enforce that free(Block* block) => block->event_count == 0. This is + * for compatibility reasons, and we can explore enforcing these in subsequent + * versions. + * + * Note that this caching host allocator does not split larger allocations into + * smaller blocks, unlike the caching device allocator. + * + * In order to gather statistics about caching host allocator while minimally + * impacting performance, we use a HostStatsStaged struct to stage the stats + * before reporting them. This is done to avoid adding new locks to the allocator. + * Collecting stats is carefully done under existing locks, and then the staged + * stats are converted to the final stats when getStats is called. At that time + * we hold the same locks as empty_cache, to ensure the fidelity of the stats. + */ + +template < + typename S, + typename E, + typename B = HostBlock> +struct CachingHostAllocatorImpl { + virtual ~CachingHostAllocatorImpl() { + if (active_) { + active_ = false; + getBackgroundThreadPool()->waitWorkComplete(); + } + } + + public: + // return data_ptr and block pair. + virtual std::pair allocate(size_t size) { + if (size == 0) { + return {nullptr, nullptr}; + } + + // If we are using background threads, we can process events in the + // background. + if (!pinned_use_background_threads()) { + process_events(); + } + + // Round up the allocation to the nearest power of two to improve reuse. + // These power of two sizes are also used to index into the free list. + size_t roundSize = c10::llvm::PowerOf2Ceil(size); + + // First, try to allocate from the free list + auto* block = get_free_block(roundSize); + if (block) { + return {block->ptr_, reinterpret_cast(block)}; + } + + // Check in the recently freed blocks with pending events to see if we + // can reuse them. Call get_free_block again after processing events + if (pinned_use_background_threads()) { + // Launch the background thread and process events in a loop. + static bool background_thread_flag [[maybe_unused]] = [this] { + active_ = true; + getBackgroundThreadPool()->run([&]() { + while (active_) { + process_events(); + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } + }); + return true; + }(); + } + + // Slow path: if we can't allocate from the cached free list, we need + // to create a new block. + void* ptr = nullptr; + allocate_host_memory(roundSize, &ptr); + + // Then, create a new block. + block = new B(roundSize, ptr); + block->allocated_ = true; + + add_allocated_block(block); + return {block->ptr_, reinterpret_cast(block)}; + } + + virtual void free(void* ctx) { + if (!ctx) { + return; + } + + // Note: we can assume that free is correctly paired with alloc, and thus we + // do not need to look up the ctx in blocks_. + auto* block = reinterpret_cast(ctx); + + std::optional> events; + ska::flat_hash_set streams; + { + std::lock_guard g(block->mutex_); + block->allocated_ = false; + if (block->streams_.empty()) { + TORCH_INTERNAL_ASSERT(block->event_count_ == 0); + } else { + events = std::vector(); + events->reserve(block->streams_.size()); + block->event_count_ += block->streams_.size(); + // Move out streams to avoid holding the mutex during event recording + streams = std::move(block->streams_); + block->streams_.clear(); + } + } + + // Event recording must be done outside the mutex to avoid potential + // deadlocks (e.g., when Python GIL is involved) + for (auto stream : streams) { + record_stream(events, stream); + } + + if (!events) { + auto index = size_index(block->size_); + std::lock_guard g(free_list_[index].mutex_); + free_list_[index].list_.push_back(block); + } else { + // restore these events that record by used streams. + std::lock_guard g(events_mutex_); + for (auto&& event : *events) { + events_.emplace_front(std::move(event), block); + } + } + } + + virtual bool record_event(void* ptr, void* ctx, c10::Stream s) { + S stream = S(s); + auto* block = reinterpret_cast(ctx); + + // Note: we need to check if the passed-in `ctx` is valid. This is because + // `record_event` (via `CachingHostAllocator_recordEvent`) can be invoked on + // an arbitrary tensor, and is not guaranteed to correspond to a pinned + // memory allocation. Therefore, we need to check that `ctx` is valid before + // proceeding. + { + std::lock_guard g(blocks_mutex_); + if (blocks_.find(block) != blocks_.end()) { + // Now we know this object is safe to access. + std::lock_guard gb(block->mutex_); + TORCH_INTERNAL_ASSERT(block->allocated_); + block->streams_.insert(stream); + return true; + } + auto it = ptr_to_block_.find(ptr); + if (it != ptr_to_block_.end()) { + block = it->second; + std::lock_guard g(block->mutex_); + TORCH_INTERNAL_ASSERT(block->allocated_); + block->streams_.insert(stream); + return true; + } + } + + return false; + } + + virtual void empty_cache() { + // Flush any available blocks into the free_list. + process_events(); + + // Remove all elements from the free list, remove them from the blocks + // list, and free the associated pinned memory allocation. This requires + // concurrently holding both the free list mutexes and the blocks mutex, and + // is the only function that concurrently holds multiple mutexes. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + std::vector blocks_to_remove(free_list_[i].list_.begin(), free_list_[i].list_.end()); + free_list_[i].list_.clear(); + + for (auto* block : blocks_to_remove) { + blocks_.erase(block); + ptr_to_block_.erase(block->ptr_); + auto index = size_index(block->size_); + free_block(block); + stats_.allocations.decrease(1); + stats_.allocated_bytes.decrease(block->size_); + stats_.allocation_bucket_stats[index].decrease(1); + stats_.allocated_bytes_bucket_stats[index].decrease(block->size_); + delete block; + } + } + } + + inline size_t size_index(size_t size) { + return c10::llvm::Log2_64_Ceil(size); + } + + virtual bool pinned_use_background_threads() { + return c10::CachingAllocator::AcceleratorAllocatorConfig:: + pinned_use_background_threads(); + } + + virtual void copy_data(void* dest [[maybe_unused]], const void* src [[maybe_unused]], std::size_t count [[maybe_unused]]) const { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for copy_data"); + } + + HostStats getStats() { + HostStats stats; + + // To keep getStats lightweight we do *not* flush any available blocks + // into the free_list. This may skew the stats a bit. + + auto add_bucket_stats = [](Stat& accumulator, const Stat& other) { + accumulator.allocated += other.allocated; + accumulator.current += other.current; + accumulator.freed += other.freed; + // Since peaks are measured per bucket independently, we add them up + // to estimate the total peak. This is not strictly correct, but it is + // the best approximation we can get after the fact. + accumulator.peak += other.peak; + }; + + // Accurate reading of memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + // We collect the slow-path stats only once, since they are not collected + // per bucket (we pick index 0 arbitrarily). These are also all the host + // allocations, not taking into account caching and free lists. + if (i == 0) { + stats.allocations = stats_.allocations; + stats.allocated_bytes = stats_.allocated_bytes; + stats.num_host_alloc = stats.allocations.allocated; + stats.num_host_free = stats.allocations.freed; + } + + // Bucket stats need to be merged with the slow-path stats. We do this in + // a best effort manner, since we can't really replay the cached events per bucket. + add_bucket_stats(stats.active_requests, stats_.active_bucket_stats[i]); + add_bucket_stats(stats.active_bytes, stats_.active_bytes_bucket_stats[i]); + stats.bucket_allocation[i] = stats_.allocation_bucket_stats[i].allocated; + } + + // Get the timing stats + { + std::lock_guard g(stats_.timing_mutex_); + + stats.host_alloc_time = stats_.host_alloc_time; + stats.host_free_time = stats_.host_free_time; + } + + return stats; + } + + void resetAccumulatedStats() { + // Resetting accumulated memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + if (i == 0) { + stats_.allocations.reset_accumulated(); + stats_.allocated_bytes.reset_accumulated(); + } + stats_.active_bucket_stats[i].reset_accumulated(); + stats_.active_bytes_bucket_stats[i].reset_accumulated(); + stats_.allocation_bucket_stats[i].reset_accumulated(); + stats_.allocated_bytes_bucket_stats[i].reset_accumulated(); + } + + // Also reset timing stats + { + std::lock_guard g(stats_.timing_mutex_); + stats_.host_alloc_time.reset_accumulated(); + stats_.host_free_time.reset_accumulated(); + } + } + + void resetPeakStats() { + // Resetting peak memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + if (i == 0) { + stats_.allocations.reset_peak(); + stats_.allocated_bytes.reset_peak(); + } + stats_.active_bucket_stats[i].reset_peak(); + stats_.active_bytes_bucket_stats[i].reset_peak(); + stats_.allocation_bucket_stats[i].reset_peak(); + stats_.allocated_bytes_bucket_stats[i].reset_peak(); + } + + // Also reset timing stats + { + std::lock_guard g(stats_.timing_mutex_); + stats_.host_alloc_time.reset_peak(); + stats_.host_free_time.reset_peak(); + } + } + + private: + virtual void add_allocated_block(B* block) { + std::lock_guard g(blocks_mutex_); + blocks_.insert(block); + stats_.allocations.increase(1); + stats_.allocated_bytes.increase(block->size_); + ptr_to_block_.insert({block->ptr_, block}); + + // Unfortunately, we have to, on the slow path, quickly + // lock the bucket to record the allocation. This should + // be a rare event once the cache is warmed up. + auto size = block->size_; + auto index = size_index(size); + { + std::lock_guard g(free_list_[index].mutex_); + stats_.allocation_bucket_stats[index].increase(1); + stats_.allocated_bytes_bucket_stats[index].increase(size); + stats_.active_bucket_stats[index].increase(1); + stats_.active_bytes_bucket_stats[index].increase(size); + } + } + + virtual B* get_free_block(size_t size) { + auto index = size_index(size); + std::lock_guard g(free_list_[index].mutex_); + if (!free_list_[index].list_.empty()) { + B* block = free_list_[index].list_.back(); + free_list_[index].list_.pop_back(); + block->allocated_ = true; + stats_.active_bucket_stats[index].increase(1); + stats_.active_bytes_bucket_stats[index].increase(size); + return block; + } + return nullptr; + } + + virtual void process_events() { + // process all events until the last unready event, not for specific size. + process_events_for_specific_size(-1); + } + + // If size is -1, process all events from backwards until the last unready + // event. Otherwise, process events for a specific size and on first ready block + // is found, add it to the free list and return. + virtual void process_events_for_specific_size(int64_t size) { + size_t event_count = 0; + size_t max_events = 0; + { + std::lock_guard g(events_mutex_); + max_events = events_.size(); + } + + while (true) { + // Avoid calling cudaEventDestroy while holding a mutex, so move + // intermediate events out of the lock into this object. + // process the last event + std::optional> processed; + { + std::lock_guard g(events_mutex_); + if (!events_.empty()) { + processed = std::move(events_.back()); + events_.pop_back(); + } + } + + if (!processed) { + return; + } + + if (size != -1) { + if (event_count++ > max_events) { + { + std::lock_guard g(events_mutex_); + events_.push_front(std::move(*processed)); + } + return; + } + if (size != (int64_t)processed->second->size_) { + // if we are processing a specific size, and the size of the block + // doesn't match, we can't use it. + { + std::lock_guard g(events_mutex_); + events_.push_front(std::move(*processed)); + } + continue; + } + } + + // otherwise, query the event + { + // now, see if we can handle this element + auto& event = processed->first; + if (!query_event(event)) { + // push the event onto the back if it's not ready. + { + std::lock_guard g(events_mutex_); + if (size == -1) { + events_.push_back(std::move(*processed)); + return; + } else { + events_.push_front(std::move(*processed)); + continue; + } + } + } + } + + // Process the events. + TORCH_INTERNAL_ASSERT(processed); + auto* block = processed->second; + bool available = false; + { + std::lock_guard g(block->mutex_); + TORCH_INTERNAL_ASSERT(!block->allocated_) + block->event_count_--; + if (block->event_count_ == 0) { + available = true; + } + } + + if (available) { + auto index = size_index(block->size_); + std::lock_guard g(free_list_[index].mutex_); + free_list_[index].list_.push_back(block); + stats_.active_bucket_stats[index].decrease(1); + stats_.active_bytes_bucket_stats[index].decrease(size); + if (size != -1) { + return; + } + } + } + } + + TaskThreadPool* getBackgroundThreadPool() { + static TaskThreadPool* pool = new TaskThreadPool(1); + return pool; + } + + /* These following functions are runtime-related. */ + + // Allocate page-locked memory on the host. + virtual void allocate_host_memory(size_t size, void** ptr) { + TORCH_CHECK_NOT_IMPLEMENTED( + false, "Not implemented for allocate_host_memory"); + } + + // Free block and release the pointer contained in block. + virtual void free_block(B* block) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for free_block"); + } + + // Record an event on stream and store event into events. + virtual void record_stream(std::optional>& events, S stream) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for record_stream"); + } + + // Query event if it is completed. + virtual bool query_event(E& event) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for query_event"); + } + + alignas(hardware_destructive_interference_size) std::mutex blocks_mutex_; + ska::flat_hash_set blocks_; // block list + ska::flat_hash_map ptr_to_block_; + + // We keep free list as a vector of free lists, one for each power of two + // size. This allows us to quickly find a free block of the right size. + // We use deque to store per size free list and guard the list with its own + // mutex. + alignas(hardware_destructive_interference_size) std::vector> + free_list_{MAX_SIZE_INDEX}; + + alignas(hardware_destructive_interference_size) std::mutex events_mutex_; + std::deque> events_; // event queue paired with block + + // Indicates whether the event-processing thread pool is active. + // Set to false in the destructor to signal background threads to stop. + std::atomic active_{false}; +protected: + alignas(hardware_destructive_interference_size) HostStatsStaged stats_; +}; + +struct TORCH_API HostAllocator : public at::Allocator { + // Associates the pinned memory allocation with a stream to track + // dependencies. This ensures the memory won't be reused until the stream's + // operations complete + virtual bool record_event(void* ptr, void* ctx, c10::Stream stream) = 0; + + // Frees all cached pinned memory and returns it to the system, clearing the + // allocator's internal cache + virtual void empty_cache() = 0; + + // Returns comprehensive statistics about the allocator's memory usage, + // allocation patterns, and timing metrics + virtual HostStats get_stats() = 0; + + // Resets the cumulative allocation statistics + virtual void reset_accumulated_stats() = 0; + + // Resets the peak memory usage metrics + virtual void reset_peak_stats() = 0; +}; + +template +struct CachingHostAllocatorInterface : public HostAllocator { + CachingHostAllocatorInterface() : impl_(std::make_unique()) {} + + at::DataPtr allocate(size_t size) override { + auto ptr_and_ctx = impl_->allocate(size); + return { + ptr_and_ctx.first, + ptr_and_ctx.second, + deleteFunc, // Use the template parameter deleter function + at::DeviceType::CPU}; + } + + void free(void* ctx) { + impl_->free(ctx); + } + + bool record_event(void* ptr, void* ctx, c10::Stream stream) override { + return impl_->record_event(ptr, ctx, stream); + } + + void empty_cache() override { + impl_->empty_cache(); + } + + void copy_data(void* dest, const void* src, std::size_t count) + const override { + impl_->copy_data(dest, src, count); + } + + HostStats get_stats() override { + return impl_->getStats(); + } + + void reset_accumulated_stats() override { + impl_->resetAccumulatedStats(); + } + + void reset_peak_stats() override { + impl_->resetPeakStats(); + } + + std::unique_ptr impl_; +}; + +#define DECLARE_HOST_ALLOCATOR(name, impl, deleter, instance) \ + void deleter(void* ptr); \ + struct name final \ + : public at::CachingHostAllocatorInterface {}; \ + static name instance; \ + void deleter(void* ptr) { \ + instance.free(ptr); \ + } + +/** + * Set the host allocator for DeviceType `device_type`. This allocator manages + * pinned memory on the host that can be accessed efficiently by the specified + * device type. Note that this function is not thread-safe. + */ +TORCH_API void setHostAllocator( + at::DeviceType device_type, + at::HostAllocator* allocator, + uint8_t priority = 0); + +TORCH_API at::HostAllocator* getHostAllocator(at::DeviceType device_type); + +template +struct HostAllocatorRegistry { + explicit HostAllocatorRegistry(HostAllocator* allocator) { + at::setHostAllocator(device_type, allocator); + } +}; + +#define REGISTER_HOST_ALLOCATOR(device_type, allocator) \ + namespace { \ + static at::HostAllocatorRegistry \ + g_host_allocator_registry_instance(allocator); \ + } + +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h new file mode 100644 index 0000000000000000000000000000000000000000..c02296ff570367ef1fdf811195b8e54116979b38 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace c10::impl { + +inline std::optional +check_tensor_options_and_extract_memory_format( + const TensorOptions& options, + std::optional memory_format) { + TORCH_CHECK( + options.requires_grad_opt() != true, + "Operators taking TensorOptions cannot take a TensorOptions with " + "options.requires_grad set as true. This isn't implemented yet."); + TORCH_CHECK( + !(options.has_memory_format() && memory_format.has_value()), + "Cannot set memory_format both in TensorOptions and explicit argument; please delete " + "the redundant setter."); + if (memory_format.has_value()) { + return memory_format; + } else { + return options.memory_format_opt(); + } +} + +} // namespace impl namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h new file mode 100644 index 0000000000000000000000000000000000000000..2ea6e095b68839211938f983c70e06b6f2088f25 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h @@ -0,0 +1,144 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace at { + +class Tensor; + +// This class specifies a Backend and a ScalarType. Currently, it primarily +// serves as a replacement return value for Tensor::type(). Previously, +// Tensor::type() returned Type&, but we are changing Type to not be +// dtype-specific. +class TORCH_API DeprecatedTypeProperties { + public: + DeprecatedTypeProperties(Backend backend, ScalarType scalar_type) + : backend_(backend), scalar_type_(scalar_type) {} + + Backend backend() const { + return backend_; + } + + Layout layout() const { + return layout_from_backend(backend_); + } + + bool is_sparse() const { + return layout_from_backend(backend()) == kSparse; + } + + bool is_sparse_csr() const { + return layout_from_backend(backend()) == kSparseCsr; + } + + c10::DeviceType device_type() const { + return backendToDeviceType(backend_); + } + + bool is_cuda() const { + return backendToDeviceType(backend_) == kCUDA; + } + + ScalarType scalarType() const { + return scalar_type_; + } + + caffe2::TypeMeta typeMeta() const { + return scalarTypeToTypeMeta(scalar_type_); + } + + bool operator==(const DeprecatedTypeProperties& other) const { + return backend_ == other.backend() && scalar_type_ == other.scalarType(); + } + + bool operator!=(const DeprecatedTypeProperties& other) const { + return !(*this == other); + } + + std::string toString() const { + std::string base_str; + if (backend_ == Backend::Undefined || scalar_type_ == ScalarType::Undefined) { + base_str = "UndefinedType"; + } else { + base_str = std::string(at::toString(backend_)) + at::toString(scalar_type_) + "Type"; + } + return base_str; + } + + DeprecatedTypeProperties & toBackend(Backend b) const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + b, scalar_type_); + } + + DeprecatedTypeProperties & toScalarType(ScalarType s) const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + backend_, s); + } + + DeprecatedTypeProperties & cpu() const { + return toBackend(Backend::CPU); + } + + DeprecatedTypeProperties & cuda() const { + return toBackend(Backend::CUDA); + } + + DeprecatedTypeProperties & hip() const { + return toBackend(Backend::HIP); + } + + DeprecatedTypeProperties & privateUser1() const { + return toBackend(Backend::PrivateUse1); + } + + /// Constructs the `TensorOptions` from a type and a `device_index`. + TensorOptions options(int16_t device_index = -1) const { + return TensorOptions().dtype(typeMeta()) + .device(device_type(), static_cast(device_index)) + .layout(layout()); + } + + /// Constructs the `TensorOptions` from a type and a Device. Asserts that + /// the device type matches the device type of the type. + TensorOptions options(std::optional device_opt) const { + if (!device_opt.has_value()) { + return options(-1); + } else { + Device device = device_opt.value(); + AT_ASSERT(device.type() == device_type()); + return options(device.index()); + } + } + + operator TensorOptions() const { + return options(); + } + + int64_t id() const { + return static_cast(backend()) * + static_cast(ScalarType::NumOptions) + + static_cast(scalarType()); + } + + Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const; + Storage unsafeStorageFromTH(void * th_pointer, bool retain) const; + Tensor copy(const Tensor & src, bool non_blocking=false, std::optional to_device={}) const; + + private: + Backend backend_; + ScalarType scalar_type_; +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h new file mode 100644 index 0000000000000000000000000000000000000000..0119d4c13efc0c17609553b5f8581a1a782eed00 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// In order to preserve bc, we make DeprecatedTypeProperties instances unique +// just like they are for Type. + +#include +#include +#include + +namespace at { + +class DeprecatedTypeProperties; + +struct TORCH_API DeprecatedTypePropertiesDeleter { + void operator()(DeprecatedTypeProperties * ptr); +}; + +class TORCH_API DeprecatedTypePropertiesRegistry { + public: + DeprecatedTypePropertiesRegistry(); + + DeprecatedTypeProperties& getDeprecatedTypeProperties(Backend p, ScalarType s) const; + +private: + // NOLINTNEXTLINE(*c-array*) + std::unique_ptr registry + [static_cast(Backend::NumOptions)] + [static_cast(ScalarType::NumOptions)]; +}; + +TORCH_API DeprecatedTypePropertiesRegistry& globalDeprecatedTypePropertiesRegistry(); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h new file mode 100644 index 0000000000000000000000000000000000000000..35b829519081d74bd90b36ba41fd79f8e5d86cdf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h @@ -0,0 +1,401 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +struct IValue; +template class Dict; +struct Type; + +namespace impl { + +using valid_dict_key_types = guts::typelist::typelist< + int64_t, + std::string, + double, + c10::complex, + bool, + at::Tensor +>; +} + +namespace detail { + +struct DictKeyHash { + size_t operator()(const IValue& ivalue) const; +}; + +struct DictKeyEqualTo { + bool operator()(const IValue& lhs, const IValue& rhs) const; +}; + +struct DictImpl final : public c10::intrusive_ptr_target { + using dict_map_type = ska_ordered::order_preserving_flat_hash_map; + struct DictElementTypes final { + TypePtr keyType; + TypePtr valueType; + }; + + explicit DictImpl(dict_map_type dict_, DictElementTypes elementTypes_) + : dict(std::move(dict_)) + , elementTypes(std::move(elementTypes_)) {} + dict_map_type dict; + + DictElementTypes elementTypes; + + intrusive_ptr copy() const; + friend TORCH_API bool operator==(const DictImpl& lhs, const DictImpl& rhs); +}; + +} + +namespace impl { +template class DictIterator; + +/** + * A reference to an entry in the Dict. + * Use the `key()` and `value()` methods to read the element. + */ +template +class DictEntryRef final { +public: + explicit DictEntryRef(Iterator iterator) + : iterator_(std::move(iterator)) {} + + decltype(auto) key() const { + return iterator_->first.template to(); + } + + decltype(auto) value() const { + return iterator_->second.template to(); + } + + template + void setValue(Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the value argument of setValue()"); + iterator_->second = Value(std::forward(value)); + } + ~DictEntryRef() = default; + +private: + // allow copying and moving, but only our friends (i.e. the Dict class) can do + // it. Copying/moving this reference wrapper would be too ambiguous to allow it + // in the public API. + DictEntryRef(const DictEntryRef&) = default; + DictEntryRef& operator=(const DictEntryRef&) = default; + DictEntryRef(DictEntryRef&&) noexcept = default; + DictEntryRef& operator=(DictEntryRef&& rhs) & noexcept = default; + + Iterator iterator_; + friend class DictIterator; + friend class Dict; +}; + +// this wraps map_type::iterator to make sure user code can't rely +// on it being the type of the underlying map. +template +class DictIterator final { +public: + // C++17 friendly std::iterator implementation + using iterator_category = std::forward_iterator_tag; + using value_type = DictEntryRef; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit DictIterator() = default; + ~DictIterator() = default; + + DictIterator(const DictIterator& rhs): entryRef_(rhs.entryRef_) {} + DictIterator(DictIterator&& rhs) noexcept: entryRef_(std::move(rhs.entryRef_)) {} + DictIterator& operator=(const DictIterator& rhs) = default; + DictIterator& operator=(DictIterator&& rhs) noexcept { + entryRef_ = std::move(rhs.entryRef_); + return *this; + } + + DictIterator& operator++() { + ++entryRef_.iterator_; + return *this; + } + + DictIterator operator++(int) { + DictIterator copy(*this); + ++*this; + return copy; + } + + const DictEntryRef& operator*() const { + return entryRef_; + } + + const DictEntryRef* operator->() const { + return &entryRef_; + } + + friend difference_type operator-(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.entryRef_.iterator_ - rhs.entryRef_.iterator_; + } + +private: + explicit DictIterator(Iterator iterator): entryRef_(std::move(iterator)) {} + + const Iterator& get_iterator_() const { + return entryRef_.iterator_; + } + + friend bool operator==(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() == rhs.get_iterator_(); + } + + friend bool operator!=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() != rhs.get_iterator_(); + } + + friend bool operator<(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() < rhs.get_iterator_(); + } + + friend bool operator<=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() <= rhs.get_iterator_(); + } + + friend bool operator>(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() > rhs.get_iterator_(); + } + + friend bool operator>=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() >= rhs.get_iterator_(); + } + + DictEntryRef entryRef_; + + friend class DictIterator; + friend class Dict; +}; + +template Dict toTypedDict(Dict dict); +template Dict toGenericDict(Dict dict); +} + +/** + * An object of this class stores a map from Key to Value. + * + * This is a pointer type. After a copy, both Dicts + * will share the same storage: + * + * > Dict a; + * > Dict b = a; + * > b.insert(3, "three"); + * > ASSERT("three" == a.at(3)); + * + * We use this class in the PyTorch kernel API because that + * allows us to do optimizations and switch out the underlying + * map implementation without breaking backwards compatibility + * for the kernel API. + */ +template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class Dict final { +private: + static_assert((std::is_same_v && std::is_same_v) || guts::typelist::contains::value, "Invalid Key type for Dict. We only support int64_t, double, bool, and string."); + + // impl_ stores the underlying map as a ska_ordered::order_preserving_flat_hash_map. + // We intentionally don't offer conversion from/to + // order_preserving_flat_hash_map, return references to it or something like that, + // because such operations would get expensive if we switch out + // the actual map implementation. + // This is an intrusive_ptr because Dict is a pointer type. + // Invariant: This will never be a nullptr, there will always be a valid + // DictImpl. + c10::intrusive_ptr impl_; + + explicit Dict(c10::intrusive_ptr&& impl); + friend struct IValue; + template friend Dict impl::toTypedDict(Dict); + template friend Dict impl::toGenericDict(Dict); + +public: + using key_type = Key; + using mapped_type = Value; + using size_type = typename detail::DictImpl::dict_map_type::size_type; + using iterator = impl::DictIterator; + + /** + * Creates an empty dict. + */ + explicit Dict(); + + /** + * Create a generic dict with runtime type information. + * This only works for c10::impl::GenericDict and is not part of the public API + * but only supposed to be used internally by PyTorch. + */ + explicit Dict(TypePtr keyType, TypePtr valueType); + + ~Dict() = default; + + Dict(const Dict&) = default; + Dict& operator=(const Dict&) = default; + + /** + * Create a new Dict pointing to a deep copy of the same data. + * The Dict returned is a new dict with separate storage. + * Changes in it are not reflected in the original dict or vice versa. + */ + Dict copy() const; + + /** + * Returns an iterator to the first element of the container. + * If the container is empty, the returned iterator will be equal to end(). + */ + iterator begin() const; + + /** + * Returns an iterator to the element following the last element of the container. + * This element acts as a placeholder; attempting to access it results in undefined behavior. + */ + iterator end() const; + + /** + * Checks if the container has no elements. + */ + bool empty() const; + + /** + * Returns the number of elements in the container. + */ + size_type size() const; + + /** + * Erases all elements from the container. After this call, size() returns zero. + * Invalidates any references, pointers, or iterators referring to contained elements. May also invalidate past-the-end iterators. + */ + void clear() const; + + /** + * Inserts element(s) into the container, if the container doesn't already contain an element with an equivalent key. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return A pair consisting of an iterator to the inserted element (or to the element that prevented the insertion) and a bool denoting whether the insertion took place. + */ + template + std::pair insert(Key_&& key, Value_&& value) const; + + /** + * If an element with the given key already exists, it is overwritten with the given value. + * Otherwise, a new element with the given key and value are inserted. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return The bool component is true if the insertion took place and false if the assignment took place. The iterator component is pointing at the element that was inserted or updated. + */ + template + std::pair insert_or_assign(Key_&& key, Value_&& value) const; + + /** + * Removes the element pointed to by iter. + * May invalidate any references, pointers, or iterators referring to contained elements. + * The iterator iter must be valid and dereferenceable. Thus the end() iterator (which is valid, but is not dereferenceable) cannot be used as a value for iter. + */ + void erase(iterator iter) const; + + /** + * Removes the element with the given key, if it exists. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return The number of elements removed. This is either '1' if an element with the key existed, or '0' if it didn't. + */ + [[nodiscard]] size_t erase(const Key& key) const; + + /** + * Returns the mapped value of the element with key equivalent to key. + * If no such element exists, an exception of type std::out_of_range is thrown. + */ + Value at(const Key& key) const; + + /** + * Finds an element with key equivalent to key. + * + * @return Iterator to an element with key equivalent to key. + * If no such element is found, past-the-end (see end()) iterator is returned. + */ + iterator find(const Key& key) const; + + /** + * Checks if there is an element with key equivalent to key in the container. + * + * @return true if there is such an element, otherwise false. + */ + bool contains(const Key& key) const; + + /** + * Increase the capacity so that at least count elements can be stored without + * having to reallocate or rehash. + */ + void reserve(size_type count) const; + + /** + * Value equality comparison. This function implements Python-like semantics for + * equality: two dicts with the same identity (e.g. same pointer) trivially + * compare equal, otherwise each element is compared for equality. + */ + template + friend bool operator==( + const Dict& lhs, + const Dict& rhs); + template + friend bool operator!=( + const Dict& lhs, + const Dict& rhs); + + /** + * Identity comparison. Returns true if and only if `rhs` represents the same + * Dict object as `this`. + */ + bool is(const Dict& rhs) const; + + // private API for now because the return type will change to TypePtr + // instead of std::optional once types are mandatory. + TypePtr keyType() const; + TypePtr valueType() const; + + // [unsafe set type] + // These functions mutate the tagged type of this dictionary in place. + // There is no checking that the members of the dictionary are instances + // of the new types, nor is there a check that other IValues which + // hold references to this dictionary have the right static type. + // This functionality is used only in the unpickler, where at + // creation type the real type of the dictionary is unknown, but + // then later recovered from the static type information of the + // unpickled object. + void unsafeSetKeyType(TypePtr t); + void unsafeSetValueType(TypePtr t); +}; + +namespace impl { +// GenericDict is how IValue stores dicts. It is, however, not part of the +// public API. Kernels should use Dicts with concrete Key, Value types instead +// (maybe except for some internal prim ops). +using GenericDict = Dict; + +} +} + +namespace torch { + template using Dict = c10::Dict; +} + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..d80e69ae61f6d15c082c04213c7a4938b928ecfa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h @@ -0,0 +1,213 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { +namespace detail { +inline bool DictKeyEqualTo::operator()(const IValue& lhs, const IValue& rhs) const { + if (lhs.isTensor() && rhs.isTensor()) { + // for tensors, we compare only by identity (following how it's done in Python). + return lhs.is(rhs); + } + // Otherwise, we first compare by identity for efficiency, then by value (see: + // [container equality]) + return _fastEqualsForContainer(lhs, rhs); +} +} + +template decltype(auto) getTypePtr(); +std::string toString(const Type& type); + +namespace impl { + +template +Dict toTypedDict(GenericDict dict) { + TORCH_INTERNAL_ASSERT(*getTypePtr() == *dict.impl_->elementTypes.keyType, "Tried to cast a Dict<", toString(*dict.impl_->elementTypes.keyType), ", ", toString(*dict.impl_->elementTypes.valueType) ,"> to a Dict<", toString(*getTypePtr()), ", ", toString(*getTypePtr()), ">. Key types mismatch."); + TORCH_INTERNAL_ASSERT(*getTypePtr() == *dict.impl_->elementTypes.valueType, "Tried to cast a Dict<", toString(*dict.impl_->elementTypes.keyType), ", ", toString(*dict.impl_->elementTypes.valueType) ,"> to a Dict<", toString(*getTypePtr()), ", ", toString(*getTypePtr()), ">. Value types mismatch."); + + return Dict(std::move(dict.impl_)); +} + +template +GenericDict toGenericDict(Dict dict) { + return GenericDict(std::move(dict.impl_)); +} +} + +namespace detail { + +inline size_t DictKeyHash::operator()(const IValue& ivalue) const { + if (ivalue.isInt()) { + return std::hash()(ivalue.toInt()); + } else if (ivalue.isString()) { + return std::hash()(ivalue.toStringView()); + } else if (ivalue.isDouble()) { + return std::hash()(ivalue.toDouble()); + } else if (ivalue.isComplexDouble()) { + return c10::hash>()(ivalue.toComplexDouble()); + } else if (ivalue.isBool()) { + return std::hash()(ivalue.toBool()); + } else if (ivalue.isTensor()) { + return std::hash()(ivalue.toTensor().unsafeGetTensorImpl()); + } else if (ivalue.isDevice()) { + return std::hash()(ivalue.toDevice()); + } else { + TORCH_CHECK(false, "Can't hash IValues with tag '", ivalue.tagKind(), "'"); + } +} + +inline intrusive_ptr DictImpl::copy() const { + return make_intrusive(dict, elementTypes); +} + +} + +template +Dict::Dict() + :Dict(make_intrusive( + detail::DictImpl::dict_map_type(), + detail::DictImpl::DictElementTypes{getTypePtr(), getTypePtr()})) { + static_assert(!std::is_same_v, "This constructor is not valid for Dict. Please use c10::impl::GenericDict(keyType, valueType) instead."); + static_assert(!std::is_same_v, "This constructor is not valid for Dict<_, IValue>. Please use c10::impl::GenericDict(keyType, valueType) instead."); +} + +template +Dict::Dict(TypePtr keyType, TypePtr valueType) +: Dict(make_intrusive( + detail::DictImpl::dict_map_type(), + detail::DictImpl::DictElementTypes {std::move(keyType), std::move(valueType)})) { + static_assert(std::is_same_v, "This constructor is only valid for c10::impl::GenericDict."); + static_assert(std::is_same_v, "This constructor is only valid for c10::impl::GenericDict."); +} + +template +Dict::Dict(c10::intrusive_ptr&& impl): impl_(std::move(impl)) {} + +template +Dict Dict::copy() const { + return Dict(impl_->copy()); +} + +template +typename Dict::iterator Dict::begin() const { + return iterator{impl_->dict.begin()}; +} + +template +typename Dict::iterator Dict::end() const { + return iterator{impl_->dict.end()}; +} + +template +bool Dict::empty() const { + return impl_->dict.empty(); +} + +template +typename Dict::size_type Dict::size() const { + return impl_->dict.size(); +} + +template +void Dict::clear() const { + impl_->dict.clear(); +} + +template +template +std::pair::iterator, bool> Dict::insert(Key_&& key, Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the key argument of Dict::insert"); + static_assert(std::is_constructible_v, "Wrong type for the value argument of Dict::insert"); + auto inserted = impl_->dict.emplace( + Key(std::forward(key)), + Value(std::forward(value))); + return {iterator{inserted.first}, inserted.second}; +} + +template +template +std::pair::iterator, bool> Dict::insert_or_assign(Key_&& key, Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the key argument of Dict::insert_or_assign"); + static_assert(std::is_constructible_v, "Wrong type for the value argument of Dict::insert_or_assign"); + auto inserted = impl_->dict.insert_or_assign( + Key(std::forward(key)), + Value(std::forward(value))); + return {iterator{inserted.first}, inserted.second}; +} + +template +void Dict::erase(iterator iter) const { + impl_->dict.erase(iter.entryRef_.iterator_); +} + +template +[[nodiscard]] size_t Dict::erase(const Key& key) const { + return impl_->dict.erase(key); +} + +template +Value Dict::at(const Key& key) const { + return impl_->dict.at(key).template to(); +} + +template +typename Dict::iterator Dict::find(const Key& key) const { + return iterator{impl_->dict.find(key)}; +} + +template +bool Dict::contains(const Key& key) const { + return end() != find(key); +} + +template +void Dict::reserve(size_type count) const { + impl_->dict.reserve(count); +} + +template +TypePtr Dict::keyType() const { + return impl_->elementTypes.keyType; +} + +template +TypePtr Dict::valueType() const { + return impl_->elementTypes.valueType; +} +template +void Dict::unsafeSetKeyType(TypePtr t) { + impl_->elementTypes.keyType = std::move(t); +} + +template +void Dict::unsafeSetValueType(TypePtr t) { + impl_->elementTypes.valueType = std::move(t); +} + +template +bool operator==(const Dict& lhs, const Dict& rhs) { + // Dicts with the same identity trivially compare equal. + if (lhs.impl_ == rhs.impl_) { + return true; + } + + // Otherwise compare the values + return *lhs.impl_ == *rhs.impl_; +} + +template +bool operator!=(const Dict& lhs, const Dict& rhs) { + return !(lhs == rhs); +} + +template +bool Dict::is(const Dict& rhs) const { + return this->impl_ == rhs.impl_; +} +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h new file mode 100644 index 0000000000000000000000000000000000000000..39f8179ba188a26c8742df484be9a4d4eeaf69c0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at { + +// Redeclaring 'DimVector' type and size inside 'at' namespace. +// This is done to avoid modifying every use into their 'c10' +// equivalent. + +using c10::kDimVectorStaticSize; +using c10::DimVector; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h new file mode 100644 index 0000000000000000000000000000000000000000..eb8a834037843b434ccb60907b507d9c38acabb0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at { + +enum class NameType: uint8_t { BASIC, WILDCARD }; + +struct TORCH_API Dimname { + static Dimname fromSymbol(Symbol name); + static Dimname wildcard(); + static bool isValidName(const std::string& name); + + NameType type() const { return type_; } + Symbol symbol() const { return name_; } + + bool isBasic() const { return type_ == NameType::BASIC; } + bool isWildcard() const { return type_ == NameType::WILDCARD; } + + bool matches(Dimname other) const; + std::optional unify(Dimname other) const; + + private: + Dimname(Symbol name) + : name_(name), type_(NameType::BASIC) {} + Dimname(Symbol name, NameType type) + : name_(name), type_(type) {} + + Symbol name_; + NameType type_; +}; + +using DimnameList = c10::ArrayRef; + +TORCH_API std::ostream& operator<<(std::ostream& out, const Dimname& dimname); + +inline bool operator==(const Dimname& lhs, const Dimname& rhs) { + return lhs.symbol() == rhs.symbol(); +} + +inline bool operator!=(const Dimname& lhs, const Dimname& rhs) { + return !(lhs == rhs); +} + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h new file mode 100644 index 0000000000000000000000000000000000000000..1313f2f1a72e797b66257c81b302e14b05339302 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h @@ -0,0 +1,337 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * Distributions kernel adapted from THRandom.cpp + * The kernels try to follow std::random distributions signature + * For instance: in ATen + * auto gen = at::detail::createCPUGenerator(); + * at::uniform_real_distribution uniform(0, 1); + * auto sample = uniform(gen.get()); + * + * vs std::random + * + * std::mt19937 gen; + * std::uniform_real_distribution uniform(0, 1); + * auto sample = uniform(gen); + */ + + +namespace at { +namespace { + +/** + * Samples a discrete uniform distribution in the range [base, base+range) of type T + */ +template +struct uniform_int_from_to_distribution { + + C10_HOST_DEVICE inline uniform_int_from_to_distribution(uint64_t range, int64_t base) : range_(range), base_(base) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { +#ifdef FBCODE_CAFFE2 + if (( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) && range_ >= 1ULL << 32) +#else + if (range_ >= 1ULL << 28) // allow approx 5% skew in uniform int generation using % +#endif + { + return transformation::uniform_int_from_to(generator->random64(), range_, base_); + } else { + return transformation::uniform_int_from_to(generator->random(), range_, base_); + } + } + + private: + uint64_t range_; + int64_t base_; +}; + +/** + * Samples a discrete uniform distribution in the range [min_value(int64_t), max_value(int64_t)] + */ +template +struct uniform_int_full_range_distribution { + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + return transformation::uniform_int_full_range(generator->random64()); + } + +}; + +/** + * Samples a discrete uniform distribution in the range [0, max_value(T)] for integral types + * and [0, 2^mantissa] for floating-point types. + */ +template +struct uniform_int_distribution { + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + if constexpr (std::is_same_v || std::is_same_v) { + return transformation::uniform_int(generator->random64()); + } else { + return transformation::uniform_int(generator->random()); + } + } + +}; + +/** + * Samples a uniform distribution in the range [from, to) of type T + */ +template +struct uniform_real_distribution { + + C10_HOST_DEVICE inline uniform_real_distribution(T from, T to) : from_(from), to_(to) { + TORCH_CHECK_IF_NOT_ON_CUDA(from <= to); + TORCH_CHECK_IF_NOT_ON_CUDA(to - from <= std::numeric_limits::max()); + } + + template + C10_HOST_DEVICE inline dist_acctype operator()(RNG generator){ + if constexpr (std::is_same_v) { + return transformation::uniform_real(generator->random64(), from_, to_); + } else { + return transformation::uniform_real(generator->random(), from_, to_); + } + } + + private: + T from_; + T to_; +}; + +// The SFINAE checks introduced in #39816 looks overcomplicated and must revisited +// https://github.com/pytorch/pytorch/issues/40052 +#define DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(member) \ +template \ +struct has_member_##member \ +{ \ + typedef char yes; \ + typedef long no; \ + template static yes test(decltype(&U::member)); \ + template static no test(...); \ + static constexpr bool value = sizeof(test(0)) == sizeof(yes); \ +} + +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_double_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_double_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_float_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_float_normal_sample); + +#define DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(TYPE) \ + \ +template ::value && \ + has_member_set_next_##TYPE##_normal_sample::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* generator, ret_type* ret) { \ + if (generator->next_##TYPE##_normal_sample()) { \ + *ret = *(generator->next_##TYPE##_normal_sample()); \ + generator->set_next_##TYPE##_normal_sample(std::optional()); \ + return true; \ + } \ + return false; \ +} \ + \ +template ::value || \ + !has_member_set_next_##TYPE##_normal_sample::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* /*generator*/, ret_type* /*ret*/) { \ + return false; \ +} \ + \ +template ::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* generator, ret_type cache) { \ + generator->set_next_##TYPE##_normal_sample(cache); \ +} \ + \ +template ::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* /*generator*/, ret_type /*cache*/) { \ +} + +DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(double) +DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(float) + +/** + * Samples a normal distribution using the Box-Muller method + * Takes mean and standard deviation as inputs + * Note that Box-muller method returns two samples at a time. + * Hence, we cache the "next" sample in the CPUGeneratorImpl class. + */ +template +struct normal_distribution { + + C10_HOST_DEVICE inline normal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in >= 0, "stdv_in must be positive: ", stdv_in); + } + + template + C10_HOST_DEVICE inline dist_acctype operator()(RNG generator){ + dist_acctype ret; + // return cached values if available + if constexpr (std::is_same_v) { + if (maybe_get_next_double_normal_sample(generator, &ret)) { + return transformation::normal(ret, mean, stdv); + } + } else { + if (maybe_get_next_float_normal_sample(generator, &ret)) { + return transformation::normal(ret, mean, stdv); + } + } + // otherwise generate new normal values + uniform_real_distribution uniform(0.0, 1.0); + const dist_acctype u1 = uniform(generator); + const dist_acctype u2 = uniform(generator); + const dist_acctype r = ::sqrt(static_cast(-2.0) * ::log1p(-u2)); + const dist_acctype theta = static_cast(2.0) * c10::pi * u1; + if constexpr (std::is_same_v) { + maybe_set_next_double_normal_sample(generator, r * ::sin(theta)); + } else { + maybe_set_next_float_normal_sample(generator, r * ::sin(theta)); + } + ret = r * ::cos(theta); + return transformation::normal(ret, mean, stdv); + } + + private: + T mean; + T stdv; +}; + +template +struct DiscreteDistributionType { using type = float; }; + +template <> struct DiscreteDistributionType { using type = double; }; + +/** + * Samples a bernoulli distribution given a probability input + */ +template +struct bernoulli_distribution { + + C10_HOST_DEVICE inline bernoulli_distribution(T p_in) : p(p_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(p_in >= 0 && p_in <= 1); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::bernoulli(uniform(generator), p); + } + + private: + T p; +}; + +/** + * Samples a geometric distribution given a probability input + */ +template +struct geometric_distribution { + + C10_HOST_DEVICE inline geometric_distribution(T p_in) : p(p_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(p_in > 0 && p_in < 1); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::geometric(uniform(generator), p); + } + + private: + T p; +}; + +/** + * Samples an exponential distribution given a lambda input + */ +template +struct exponential_distribution { + + C10_HOST_DEVICE inline exponential_distribution(T lambda_in) : lambda(lambda_in) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::exponential(uniform(generator), lambda); + } + + private: + T lambda; +}; + +/** + * Samples a cauchy distribution given median and sigma as inputs + */ +template +struct cauchy_distribution { + + C10_HOST_DEVICE inline cauchy_distribution(T median_in, T sigma_in) : median(median_in), sigma(sigma_in) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::cauchy(uniform(generator), median, sigma); + } + + private: + T median; + T sigma; +}; + +/** + * Samples a lognormal distribution + * Takes mean and standard deviation as inputs + * Outputs two samples at a time + */ +template +struct lognormal_distribution { + + C10_HOST_DEVICE inline lognormal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in > 0); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator){ + normal_distribution normal(mean, stdv); + return transformation::log_normal(normal(generator)); + } + + private: + T mean; + T stdv; +}; +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h new file mode 100644 index 0000000000000000000000000000000000000000..8c9a935edb89ed5f126b769889f43c7ab4e81c12 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace c10 { +TORCH_API std::ostream& operator<<(std::ostream& out, Backend b); +TORCH_API std::ostream& operator<<(std::ostream & out, const Scalar& s); +TORCH_API std::string toString(const Scalar& s); +} +namespace at { + +TORCH_API std::ostream& operator<<(std::ostream& out, const DeprecatedTypeProperties& t); +TORCH_API std::ostream& print( + std::ostream& stream, + const Tensor& tensor, + int64_t linesize); +inline std::ostream& operator<<(std::ostream & out, const Tensor & t) { + return print(out,t,80); +} +TORCH_API void print(const Tensor & t, int64_t linesize=80); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h new file mode 100644 index 0000000000000000000000000000000000000000..0f312f4e2139f6ee258407fe54d609e101f60f85 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h @@ -0,0 +1,194 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +// For the record I don't think this is a correct pimpl idiom. +// Including Impl header in interface header defeats the purpose +// because you can't change Impl private members without forcing +// everything that included the interface to rebuild. +// Impl should be forward-declared in the interface header instead. +#include + +/** + * Note [Generator] + * ~~~~~~~~~~~~~~~~ + * A Pseudo Random Number Generator (PRNG) is an engine that uses an algorithm to + * generate a seemingly random sequence of numbers, that may be later be used in creating + * a random distribution. Such an engine almost always maintains a state and requires a + * seed to start off the creation of random numbers. Often times, users have + * found it beneficial to be able to explicitly create, retain, and destroy + * PRNG states and also be able to have control over the seed value. + * + * A Generator in ATen gives users the ability to read, write and modify a PRNG engine. + * For instance, it does so by letting users seed a PRNG engine, fork the state of the + * engine, etc. + * + * By default, there is one generator per device, and a device's generator is + * lazily created. A user can use the torch.Generator() api to create their own generator. + */ + +/** + * Note [Acquire lock when using random generators] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Generator and its derived classes are NOT thread-safe. Please note that most of the + * places where we have inserted locking for generators are historically based, and we + * haven't actually checked that everything is truly thread safe (and it probably isn't). + * Please use the public mutex_ when using any methods from these classes, except for the + * read-only methods. You can learn about the usage by looking into the unittests + * (aten/src/ATen/cpu_generator_test.cpp) and other places where we have used lock_guard. + * + * TODO: Look into changing the threading semantics of Generators in ATen (e.g., making + * them non-thread safe and instead making the generator state splittable, to accommodate + * forks into other threads). + */ + +namespace at { + +class Tensor; + +struct TORCH_API Generator { + Generator() = default; + + explicit Generator(c10::intrusive_ptr gen_impl) + : impl_(std::move(gen_impl)) { + TORCH_CHECK(impl_.get(), "GeneratorImpl with nullptr is not supported"); + } + + bool operator==(const Generator& rhs) const { + return this->impl_ == rhs.impl_; + } + + bool operator!=(const Generator& rhs) const { + return !((*this) == rhs); + } + + bool defined() const { + return static_cast(impl_); + } + + c10::GeneratorImpl* unsafeGetGeneratorImpl() const { + return impl_.get(); + } + + c10::GeneratorImpl* unsafeReleaseGeneratorImpl() { + return impl_.release(); + } + + const c10::intrusive_ptr& getIntrusivePtr() const { + return impl_; + } + + void set_current_seed(uint64_t seed) { impl_->set_current_seed(seed); } + // Sets the offset of Generator state to the desired offset. This is currently + // supported for only Philox based Generators, i.e., CUDA and MPS. + void set_offset(uint64_t offset) { impl_->set_offset(offset); } + + // Returns the offset of Generator state. This is currently supported for only + // Philox based Generators, i.e., CUDA and MPS. + uint64_t get_offset() const { return impl_->get_offset(); } + + uint64_t current_seed() const { return impl_->current_seed(); } + + uint64_t seed() { return impl_->seed(); } + + // Implementation not inlined to prevent cycle reference between + // `ATen/core/Generator.h` and `ATen/core/Tensor.h` + void set_state(const at::Tensor& new_state); + + at::Tensor get_state() const; + + void graphsafe_set_state(const Generator& new_state); + + Generator graphsafe_get_state() const; + + std::mutex& mutex() { + return impl_->mutex_; + } + + DispatchKeySet key_set() const { + return impl_->key_set(); + } + + Device device() const { return impl_->device(); } + + inline void set_pyobj(PyObject* pyobj) const noexcept { + impl_->set_pyobj(pyobj); + } + + inline PyObject* pyobj() const noexcept { + return impl_->pyobj(); + } + + template + T* get() const { return static_cast(impl_.get()); } + + Generator clone() const { + return Generator(impl_->clone()); + } + + private: + c10::intrusive_ptr impl_; +}; + +template +Generator make_generator(Args&&... args) { + return Generator(c10::make_intrusive(std::forward(args)...)); +} + +/** + * Utility function to static cast input Generator* to + * the backend generator type (CPU/CUDAGeneratorImpl etc.) + */ +template +inline T * check_generator(std::optional gen) { + TORCH_CHECK(gen.has_value(), "Expected Generator but received nullopt"); + TORCH_CHECK(gen->defined(), "Generator with undefined implementation is not allowed"); + TORCH_CHECK(T::device_type() == gen->device().type(), "Expected a '", T::device_type(), "' device type for generator but found '", gen->device().type(), "'"); + return gen->get(); +} + +/** + * Utility function used in tensor implementations, which + * supplies the default generator to tensors, if an input generator + * is not supplied. The input Generator* is also static casted to + * the backend generator type (CPU/CUDAGeneratorImpl etc.) + */ +template +inline T* get_generator_or_default(const std::optional& gen, const Generator& default_gen) { + return gen.has_value() && gen->defined() ? check_generator(gen) : check_generator(default_gen); +} + +namespace detail { + +/** + * Helper function for checking the validity of new random generator + * state. Right now following conditions are checked: + * + * - The new state tensor must be a torch.ByteTensor + * - Data of the new state tensor must be contiguous + */ +inline void check_rng_state(const c10::TensorImpl& new_state) { + TORCH_CHECK_TYPE( + new_state.layout() == kStrided && new_state.device().type() == kCPU && new_state.dtype() == kByte, + "RNG state must be a torch.ByteTensor" + ); + + TORCH_CHECK(new_state.is_contiguous(), "RNG state must be contiguous"); +} + +} // namespace detail + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h new file mode 100644 index 0000000000000000000000000000000000000000..a5c82221aadfc7c310ef7eac00cfb4a17708fe21 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +using GeneratorFuncType = std::function; + +TORCH_API std::optional& GetGeneratorPrivate(); + +class TORCH_API _GeneratorRegister { + public: + explicit _GeneratorRegister(const GeneratorFuncType& func); +}; + +TORCH_API at::Generator GetGeneratorForPrivateuse1( + c10::DeviceIndex device_index); + +/** + * This is used to register Generator to PyTorch for `privateuse1` key. + * + * Usage: REGISTER_GENERATOR_PRIVATEUSE1(MakeGeneratorForPrivateuse1) + * + * class CustomGeneratorImpl : public c10::GeneratorImpl { + * CustomGeneratorImpl(DeviceIndex device_index = -1); + * explicit ~CustomGeneratorImpl() override = default; + * ... + * }; + * + * at::Generator MakeGeneratorForPrivateuse1(c10::DeviceIndex id) { + * return at::make_generator(id); + * } + */ + +#define REGISTER_GENERATOR_PRIVATEUSE1(GeneratorPrivate) \ + static auto temp##GeneratorPrivate = at::_GeneratorRegister(GeneratorPrivate); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h new file mode 100644 index 0000000000000000000000000000000000000000..694907d44ff219fe903c5afb42baedee9d518001 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h @@ -0,0 +1,638 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +/* + * [Note: IListRef] + * Wrapper around different API containers (e.g. boxed and unboxed). + * + * What is it? + * =========== + * It is a tagged union of both boxed and unboxed API containers. + * Working implementations: + * + * - `IListRef` + * - `IListRef` + * + * Note that `IListRef` is a view type. Meaning that it won't own the + * tensors it holds. It's intended to be used only as argument parameters. + * Specifically, where these 2 worlds overlap. + * + * What is this for? + * ================= + * Historically, PyTorch has maintained 2 different APIs: the unboxed + * (called from C++ API and Python eager mode) and boxed APIs (called + * from the TorchScript JIT, mobile interpreter, and boxed fallbacks). + * + * Calling unboxed kernels from the boxed "world" and vice-versa may + * result in non-negligible overhead. Lists are one of those types: + * + * - Boxed world: `c10::List` + * - Unboxed world: `c10::ArrayRef` + * + * In this context, `c10::IListRef` solves this problem by wrapping those + * 2 container types, so that we don't need to convert from one to + * the other. + * + * (see https://github.com/pytorch/pytorch/issues/66328) + * + * What does it do? + * ================ + * This container wraps around the different tagged containers + * (currently, only boxed and unboxed), without incurring in extra + * overhead for converting from one to another. It does so while + * exposing usual container methods, which dispatch to corresponding + * implementations. + * + * While it works with different container types, it introduces + * overhead for repeatedly calling member functions (since those will + * get dispatched, again). Therefore, you should only use it to iterate + * through the list up to one time. If you need to do more complex things, + * call `materialize()` first. + * + * Adding support for a new Tag + * ============================ + * Suppose we want to add a new tag: `Chest`. Here are the steps + * we would have to go through: + * + * 1. Add a line for it in the macro `TORCH_ILISTREF_FORALL_TAGS`. + * + * #define TORCH_ILISTREF_FORALL_TAGS(_, ...) \ + * ... + * _(Chest, ##__VA_ARGS__) + * + * 2. Add type aliases, union members, and constructors. + * + * template + * class IListRef { + * ... + * using chest_type = + * typename detail::IListRefTagImpl::list_type; + * ... + * IListRef(...) : tag_(IListRefTag::Chest) { + * ... + * } + * ... + * union Payload { + * ... + * chest_type chest; + * ... + * }; + * ... + * }; + * + * 3. Add a default implementation for it (in 'IListRef_inl.h'). It's + * preferable to make the default implementation work for `T = Tensor` + * (both `Unboxed` and `Boxed` do it). + * + * template + * class IListRefTagImplBase { + * public: + * using elem_type = ListElemT; + * using list_type = ChestContainer; + * + * static const list_type& unwrap(const IListRef& ilist) { ... } + * + * static typename list_type::const_iterator& unwrap( + * IListRefIterator& it) { ... } + * + * static const typename list_type::const_iterator& unwrap( + * const IListRefIterator& it) { ... } + * + * static IListRefConstRef iterator_get( + * const typename list_type::const_iterator& it) { ... } + * } + * + * 4. Add an specialization for each of the already supported types. + * Finally, for consistency, add them to the tracking list. + * (see [Note: IListRefTagImpl Specializations]) + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * Adding support for a new Type + * ============================= + * Suppose we want to add support for a new type: `Matrix`. + * Here are the steps we would have to go through: + * + * 1. Add an specialization for each of the existing tags. + * For consistency, add them to the tracking list. + * (see [Note: IListRefTagImpl Specializations]) + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * Common Problems + * =============== + * 1. One of `IListRef(Iterator)` methods are failing to compile. + * + * That may be happening because the container type you added + * is not compatible with the code written for that method. If + * that's true, then you might have to transform that code into + * a static method call (see `List::operator[]` method). + * + * 2. Can't make `IListRefIterator::operator*` return a const-reference. + * + * First, keep in mind that we assume that boxed containers will + * have to deal with `IValue` (e.g. `c10::List`). In this context, + * what may be happening is that `IValue` doesn't store internally + * your type `T`. Instead, it constructs a type new `T` every time + * you try to get `T` for it (see `IListRef`). + */ + +namespace c10 { +template +class IListRef; + +/* + * Applies arbitrary macros to each `IListRefTag`. + */ +#define TORCH_ILISTREF_FORALL_TAGS(_, ...) \ + _(Unboxed, ##__VA_ARGS__) \ + _(Boxed, ##__VA_ARGS__) \ + _(Materialized, ##__VA_ARGS__) + +/* + * Defines a "switch-case" for `TAG`. Inside, it executes `BODY`, + * while bringing to scope: + * + * - `ImplT`: the implementation class for `TAG` + * - `this_`: the result of unwrapping `this` + */ +#define TORCH_ILISTREF_UNWRAP_CASE(TAG, BODY) \ + case c10::IListRefTag::TAG: { \ + using ImplT = c10::detail::IListRefTagImpl; \ + auto& this_ = ImplT::unwrap(*this); \ + BODY \ + } break; + +/* + * Dispatches the unwrap call, depending on `TAG`, followed by + * the execution of `BODY`. It aborts if `TAG` is not a `IListRefTag`. + * + * This macro is useful because it allows us to handle different + * types (that correspond to different tags) to be implemented + * only once. We can do it even when the implementation of the + * different tags aren't syntactically the same, by dispatching + * it to a function (e.g. `ImplT::(this_)`). + */ +#define TORCH_ILISTREF_UNWRAP(TAG, BODY) \ + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") \ + switch (TAG) { \ + TORCH_ILISTREF_FORALL_TAGS(TORCH_ILISTREF_UNWRAP_CASE, BODY) \ + break; \ + default: \ + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); \ + } \ + C10_DIAGNOSTIC_POP() + +enum class IListRefTag { +#define DEFINE_TAG(tag, ...) tag, + TORCH_ILISTREF_FORALL_TAGS(DEFINE_TAG) +#undef DEFINE_TAG + None +}; + +namespace detail { +/* + * Type alias that specifies whether we return a reference or a copy of `T`. + * + * What is this for? + * ================= + * Since values in the boxed world are represented by an `IValue`, we also + * depend on whether it can be converted to a const-reference (`Tensor`) or + * has to create a new copy of `T` (`OptionalTensorRef`). + */ +template +using IListRefConstRef = typename ivalue_to_const_ref_overload_return::type; + +/* + * Interface that implements key functions for each `IListRefTag` type. + * + * What is this for? + * ================= + * Given an `IListRef(Iterator)`, some methods have to be implemented + * differently for each `TAG`. Therefore, the methods inside this class + * are used as dispatch targets for the different `IListRefTag` values. + * + * You should create an specialization of this class for each possible + * combination of `IListRefTag` type (except `None`) and element types + * (e.g. `Tensor`). + * + * What does it do? + * ================ + * 1. defines static methods to be used as dispatch targets by both + * `IListRef` and `IListRefIterator` (see the implementation of + * `IListRefTagImplBase`). + * + * 2. defines the `elem_type` and `list_type` aliases that will be + * used in the definition of `IListRef`. In general, we should do + * so by inheriting from `IListRefTagImplBase`. + * + * [Note: IListRefTagImpl Specialization] + * ====================================== + * For `IListRef(Iterator)`: + * - + * - + * - + * + * For `IListRef(Iterator)`: + * - + * - + * - + */ +template +class IListRefTagImpl {}; + +/* + * Base implementation of `IListRefTagImpl` methods. + * + * What is this for? + * ================= + * This should make adding specializations for new types easier. For + * example, one should be able to add a new type just by making its + * `IListRefTagImpl` specialization inherit from `IListRefTagImplBase`. + * + * You should create a partial specialization for this class only if + * you introduce a new `IListRefTag`. The idea being that there is one + * default implementation for each possible value of `IListRefTag`. + * + * What does it do? + * ================ + * 1. defines `elem_type` as an alias to `ListElemT`. + * + * 1. defines `list_type` as an alias to the default container type + * that will hold a collection of `elem_type`. The idea being that + * all types tagged as `TAG` will have `list_type` as its container, + * with different `elem_type`. + * + * 3. defines the default implementation for each of the methods that + * are supposed to be defined on `IListRefTagImpl` specializations. + * + * 4. inheriting from `IListRefTagImplBase` also means + * that the payload of the type `IListRef` will be of type `list_type` + * when it is tagged as `TAG`. + */ +template +class IListRefTagImplBase {}; + +/* + * Materialized container for `IListRef`. + * + * What is this for? + * ================= + * Container that groups `T` references together. This exchanges the + * overhead of every method call from `IListRef` for a dynamic allocation. + * + * You should use this container instead of `IListRef` if: + * + * - You are going to iterate the list more than once + * - You need to repeatedly access arbitrary elements (using `operator[]`) + * What does it do? + + * ================ + * Removes the reference (&) from the type, and wraps it into a + * `std::reference_wrapper`. If `IListRefConstRef` is not a + * reference type, then it's left unchanged. + */ +template +using _MaterializedIListRefElem = std::conditional_t< + std::is_reference_v, + typename std::reference_wrapper>, + T>; + +template +using MaterializedIListRefElem = _MaterializedIListRefElem>; + +template +using MaterializedIListRef = std::vector>; + +} // namespace detail + +/* + * Iterator for `IListRef`. + * + * What is it? + * =========== + * Currently, a `std::bidirectional_iterator` that wraps the iterator + * types defined for each of the `IListRefTag`. + * + * One should be able to use it, as if it were the unwrapped + * iterators themselves. + + * What does it do? + * ================ + * Similarly to `IListRef`, this is a wrapper class. Specifically, it + * wraps each container's `const_iterator` type alias. So, for example, + * given that the container for `IListRefTag::Boxed` is `c10::List`, this + * iterator will wrap a `c10::List::const_iterator`. + * + * [Note: MSVC Iterator Debug] + * =========================== + * MSVC `vector::iterator` implementation (used in the boxed variant) + * makes it so this union's destructor, copy-constructor (assignment), and + * move-constructor (assignment) are implicitly deleted. + * + * Therefore, we need to explicitly define them as needed. Follows a list + * of places where these are needed and their reason: + * + * - `Payload` destructor: + * it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is set to 2. + * + * - `IListRefIterator` destructor: + * same as above. However, we need to explicitly call the variant + * destructor explicitly. + * + * - `IListRefIterator` copy-constructor: + * it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is different + * than 0. + */ +template +class IListRefIterator { + private: +#define DEFINE_FRIEND_CLASS(TAG, ...) \ + friend class detail::IListRefTagImpl; \ + friend class detail::IListRefTagImplBase< \ + IListRefTag::TAG, \ + T, \ + typename detail::IListRefTagImpl::elem_type>; + TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS) +#undef DEFINE_FRIEND_CLASS + + public: + // C++17 friendly std::iterator implementation + using iterator_category = std::bidirectional_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + using unboxed_iterator_type = typename detail:: + IListRefTagImpl::list_type::const_iterator; + using boxed_iterator_type = typename detail:: + IListRefTagImpl::list_type::const_iterator; + using materialized_iterator_type = + typename detail::MaterializedIListRef::const_iterator; + + IListRefIterator() : tag_(IListRefTag::None) {} + +#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL != 0 + // See [Note: MSVC Iterator Debug] + IListRefIterator(const IListRefIterator& iterator) + : tag_(iterator.tag_) { + switch (tag_) { + case IListRefTag::Boxed: + payload_.boxed_iterator = iterator.payload_.boxed_iterator; + break; + case IListRefTag::Unboxed: + payload_.unboxed_iterator = iterator.payload_.unboxed_iterator; + break; + case IListRefTag::Materialized: + payload_.materialized_iterator = iterator.payload_.materialized_iterator; + break; + default: + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); + } + } +#endif + +#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL == 2 + // See [Note: MSVC Iterator Debug] + ~IListRefIterator() noexcept(false) { + switch (tag_) { + case IListRefTag::Boxed: + payload_.boxed_iterator.~boxed_iterator_type(); + break; + case IListRefTag::Unboxed: + payload_.unboxed_iterator.~unboxed_iterator_type(); + break; + case IListRefTag::Materialized: + payload_.materialized_iterator.~materialized_iterator_type(); + break; + default: + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); + } + } +#endif + + IListRefIterator(boxed_iterator_type boxed) : tag_(IListRefTag::Boxed) { + payload_.boxed_iterator = boxed; + } + + IListRefIterator(unboxed_iterator_type unboxed) : tag_(IListRefTag::Unboxed) { + payload_.unboxed_iterator = unboxed; + } + + IListRefIterator(materialized_iterator_type materialized) : tag_(IListRefTag::Materialized) { + payload_.materialized_iterator = materialized; + } + + detail::IListRefConstRef operator*() const { + TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::iterator_get(this_); }); + } + + IListRefIterator& operator++() { + TORCH_ILISTREF_UNWRAP(tag_, { ++this_; }); + return *this; + } + + IListRefIterator operator++(int) { + auto old = *this; + TORCH_ILISTREF_UNWRAP(tag_, { ++this_; }); + return old; + } + + IListRefIterator& operator--() { + TORCH_ILISTREF_UNWRAP(tag_, { --this_; }); + return *this; + } + + IListRefIterator operator--(int) { + auto old = *this; + TORCH_ILISTREF_UNWRAP(tag_, { --this_; }); + return old; + } + + bool operator==(const IListRefIterator& rhs) const { + if (tag_ != rhs.tag_) { + return false; + } + TORCH_ILISTREF_UNWRAP(tag_, { + auto& rhs_it = ImplT::unwrap(rhs); + return this_ == rhs_it; + }); + } + + bool operator!=(const IListRefIterator& rhs) const { + return !(*this == rhs); + } + + private: + union Payload { + boxed_iterator_type boxed_iterator; + unboxed_iterator_type unboxed_iterator; + materialized_iterator_type materialized_iterator; + void* _init_ptr; + Payload() : _init_ptr(nullptr) {} +#if defined(_MSC_VER) + // See [Note: MSVC Iterator Debug] + ~Payload() {} +#endif + }; + + Payload payload_; + IListRefTag tag_; +}; + +/* + * See [Note: IListRef] + */ +template +class IListRef { + private: +#define DEFINE_FRIEND_CLASS(TAG, ...) \ + friend class detail::IListRefTagImpl; \ + friend class detail::IListRefTagImplBase< \ + IListRefTag::TAG, \ + T, \ + typename detail::IListRefTagImpl::elem_type>; + TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS) +#undef DEFINE_FRIEND_CLASS + + public: + using unboxed_type = + typename detail::IListRefTagImpl::list_type; + using boxed_type = + typename detail::IListRefTagImpl::list_type; + using materialized_type = + typename detail::MaterializedIListRef; + + using iterator = IListRefIterator; + using const_iterator = IListRefIterator; + using reverse_iterator = std::reverse_iterator; + using value_type = typename iterator::value_type; + + IListRef() : tag_(IListRefTag::None) {} + + IListRef(const boxed_type& boxed) : tag_(IListRefTag::Boxed) { + payload_.boxed = &boxed; + } + + IListRef(const unboxed_type& unboxed) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = unboxed; + } + + IListRef(const std::initializer_list& list) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = at::ArrayRef(list); + } + + template < + typename... UnboxedConstructorArgs, + typename = std::enable_if_t< + std::is_constructible_v>> + IListRef(UnboxedConstructorArgs&&... args) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = unboxed_type(std::forward(args)...); + } + + IListRef(const materialized_type& materialized) : tag_(IListRefTag::Materialized) { + payload_.materialized = &materialized; + } + + size_t size() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.size(); }); + } + + bool empty() const { + return size() == 0; + } + + iterator begin() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.begin(); }); + } + + iterator end() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.end(); }); + } + + detail::IListRefConstRef front() const { + TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::front(this_); }); + } + + /* + * Materializes the `IListRef` into a `std::vector`. + * + * This should be used when one wishes to either: + * + * - iterate over the list more than once: each `IListRefIterator` + * member function call has to go through a switch, introducing + * non-negligible overhead + * + * - randomly access an arbitrary element using `operator[]`: + * same reason as above + */ + detail::MaterializedIListRef materialize() const { + if (isMaterialized()) { + return toMaterialized(); + } + + detail::MaterializedIListRef materialized; + materialized.reserve(size()); + for (const auto& t : *this) { + materialized.emplace_back(t); + } + return materialized; + } + +#define DEFINE_CHECK(TAG, ...) \ + bool is##TAG() const { \ + return tag_ == IListRefTag::TAG; \ + } + TORCH_ILISTREF_FORALL_TAGS(DEFINE_CHECK) +#undef DEFINE_CHECK + + bool isNone() const { + return tag_ == IListRefTag::None; + } + +#define DEFINE_CASTING(TAG, ...) \ + const typename detail::IListRefTagImpl::list_type& \ + to##TAG() const { \ + TORCH_INTERNAL_ASSERT(is##TAG()); \ + return detail::IListRefTagImpl::unwrap(*this); \ + } + TORCH_ILISTREF_FORALL_TAGS(DEFINE_CASTING) +#undef DEFINE_CASTING + + private: + union Payload { + const boxed_type* boxed; + unboxed_type unboxed; + const materialized_type* materialized; + Payload() : boxed(nullptr) {} + }; + + Payload payload_; + IListRefTag tag_; +}; + +} // namespace c10 + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..98bf272763e98ee82db4c88cfeae2588f1085c2e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h @@ -0,0 +1,208 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { +class Tensor; +class OptionalTensorRef; +} + + +namespace c10::detail { + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Unboxed`. + */ +template +class IListRefTagImplBase { + public: + using elem_type = ListElemT; + using list_type = ArrayRef; + + /* + * These `unwrap` static methods unwraps the inner containers out + * of `IListRef` (and `IListRefIterator`). They are required when + * the macro `TORCH_ILISTREF_UNWRAP` is called. + */ + static const list_type& unwrap(const IListRef& ilist) { + return ilist.payload_.unboxed; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.unboxed_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.unboxed_iterator; + } + + /* + * We have these function (besides the `unwrap`s above) because the + * implementation for both `IListRef::operator[]` and `IListRefIterator::operator*` + * weren't syntactically equal for the existing tags at the time + * (`Unboxed` and `Boxed`). + */ + static IListRefConstRef front(const list_type& lst) { + return lst.front(); + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return *it; + } +}; + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Boxed`. + */ +template +class IListRefTagImplBase { + public: + using elem_type = ListElemT; + using list_type = List; + + static const list_type& unwrap(const IListRef& ilist) { + return *ilist.payload_.boxed; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.boxed_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.boxed_iterator; + } + + static IListRefConstRef front(const list_type& lst) { + return lst[0]; + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return (*it).get().toTensor(); + } +}; + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Materialized`. + */ +template +class IListRefTagImplBase> { + public: + using elem_type = MaterializedIListRefElem; + using list_type = MaterializedIListRef; + + static const list_type& unwrap(const IListRef& ilist) { + return *ilist.payload_.materialized; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.materialized_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.materialized_iterator; + } + + static IListRefConstRef front(const list_type& lst) { + return lst[0]; + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return *it; + } +}; + +/* + * [Note: ITensorListRef] + * Specializations necessary for `IListRef` type. + * + * Since the default implementations are usually done with supporting + * `Tensor` in mind, we only have to inherit from the base implementations. + */ +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase< + IListRefTag::Materialized, + at::Tensor, + MaterializedIListRefElem> {}; + +/* + * [Note: IOptTensorListRef] + * Specializations necessary for `IListRef` type. + * + * We can't get an `at::OptionalTensorRef` directly from an instance of + * `List>` (the type that corresponds to the boxed world). + * + * So, the default implementation won't help us. Thus, we have to implement + * this method ourselves. + */ +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase> { + + public: + /* + * Given an instance of the types corresponding to the `Boxed` tag, we override + * the default implementation, so that we can return a `at::OptionalTensorRef`. + */ + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdangling-reference") + const auto& ivalue = (*it).get(); + C10_DIAGNOSTIC_POP() + if (!ivalue.isNone()) { + const auto& tensor = ivalue.toTensor(); + return (tensor.defined()) ? tensor : at::OptionalTensorRef{}; + } + return {}; + } +}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase< + IListRefTag::Materialized, + at::OptionalTensorRef, + MaterializedIListRefElem> {}; + +} // namespace c10::detail + + +namespace at { + +// [Note: ITensorListRef] +using ITensorListRef = c10::IListRef; +using ITensorListRefIterator = c10::IListRefIterator; +using MaterializedITensorListRef = c10::detail::MaterializedIListRef; +// [Note: IOptTensorListRef] +using IOptTensorListRef = c10::IListRef; +using IOptTensorListRefIterator = c10::IListRefIterator; +using MaterializedIOptTensorListRef = c10::detail::MaterializedIListRef; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..26802a959251e74cdeb7fd0601ceb6fc098702c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// The legacy mechanism for dispatching operators in ATen is a Type +// object, which is essentially a giant virtual dispatch table +// for every operation we support dynamically dispatching over. +// +// This has been deprecated in favor of ATenDispatch, and in the future, +// c10 dispatcher. +// TODO: Clean up what remains here + +#include + +namespace at { + +// A RAII, thread local (!) guard that will disable dispatch to variable +// handler. +// +// NOTE [ Treating Variables as non-Variables in type dispatch ] +// +// What exactly does AutoDispatchBelowAutograd do? The short answer is, it causes +// dispatches on ATen functions to go to the non-variable implementation, +// bypassing autograd handling (and also profiling and tracing). +// +// To understand why this guard exists, it's helpful to understand the history +// behind how Variable was implemented. Previously, Variables were implemented +// as a wrapper on Tensors; so the act of processing a Variable involved +// unwrapping the underlying Tensor, and then calling the underlying base +// operation on /that/ operation +// +// However, after the Variable/Tensor merge, there is no concept of unwrapping +// a tensor anymore. If you just call the operation on the same variable +// again inside your VariableType handler, you'll dispatch back to +// VariableType, which is not what we want. +// +// The solution to the above problem is to add `at::AutoDispatchBelowAutograd`, which +// when enabled will cause `legacyTensorType()` and `getType()` to always return +// non-Variable type, even if the tensor being called on is a variable. + +/* Note [AutoDispatchBelowAutograd] + * AutoDispatchBelowAutograd is **INTERNAL ONLY** that it should be used + * for kernel implementations and customized C++ kernels. + * If you are looking for a guard to run workload in inference mode, please use + * c10::InferenceMode RAII which is user facing API. + * In the past AutoDispatchBelowAutograd(or its old version AutoNonVariableTypeMode) + * was used in the user code for inference-only workload, this was under risk of + * producing wrong results silently in some edge cases. For example: + * ``` + * torch::Tensor s = torch::ones({1, 2, 3}).set_requires_grad(true); + * torch::Tensor out = s * s; + * { + * at::AutoDispatchBelowAutograd guard; + * s.add_(1); // Skips version bump on `s`. + * } + * // WRONG GRADIENT! s.grad() are now computed using `s` value after the + * // inplace update. + * out.backward(torch::ones_like(out)); + * ``` + * Users should use `c10::InferenceMode` here so that it'll properly throw an + * error saying "one of the variables needed for gradient computation has be modified." + */ +struct TORCH_API AutoDispatchBelowAutograd { + AutoDispatchBelowAutograd() : + autograd_guard_(c10::autograd_dispatch_keyset) { + } + + // disable all autograd dispatch keys + c10::impl::ExcludeDispatchKeyGuard autograd_guard_; +}; + +// TODO: AutoNonVariableTypeMode should be removed in release 1.10. +struct TORCH_API AutoNonVariableTypeMode { + AutoNonVariableTypeMode(bool enabled = true) : + autograd_guard_(c10::autograd_dispatch_keyset) { + TORCH_WARN_ONCE("AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. " + "For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, " + "If you are looking for a user facing API to enable running your inference-only " + "workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code " + "is under risk of producing silent wrong result in some edge cases. " + "See Note [AutoDispatchBelowAutograd] for more details."); + TORCH_INTERNAL_ASSERT(enabled); + } + + // disable all autograd dispatch keys + c10::impl::ExcludeDispatchKeyGuard autograd_guard_; +}; + +struct TORCH_API AutoDispatchSkipFunctionalize { + AutoDispatchSkipFunctionalize() : + dispatch_key_guard_(c10::DispatchKeySet(c10::DispatchKey::Functionalize)) { + } + c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_; +}; + +/* Note [AutoDispatchBelowADInplaceOrView] + * AutoDispatchBelowADInplaceOrView is equivalent to AutoNonVariableTypeMode + * before we split inplace & view ops out of VariableType kernel. + * Note this guard is used in VariableType kernels for functional ops + * as well as ADInplaceOrView kernels for inplace/view ops to enforce the + * Invariant: + * Once you are in VariableType/ADInplaceOrView kernel for an op, + * you never go back to a kernel on same dispatch key until + * you finish the current op. + */ +struct TORCH_API AutoDispatchBelowADInplaceOrView { + AutoDispatchBelowADInplaceOrView() : + dispatch_key_guard_(c10::autograd_dispatch_keyset_with_ADInplaceOrView) { + } + // disable Autograd & ADInplaceOrView dispatch keys + c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_; +}; +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List.h new file mode 100644 index 0000000000000000000000000000000000000000..f109430c5427471d494c4b8081ef519ee8329972 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List.h @@ -0,0 +1,496 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +class Tensor; +} +namespace c10 { +struct IValue; +template class List; +struct Type; + +namespace detail { + +struct ListImpl final : public c10::intrusive_ptr_target { + using list_type = std::vector; + + explicit TORCH_API ListImpl(list_type list_, TypePtr elementType_); + + list_type list; + + TypePtr elementType; + + intrusive_ptr copy() const { + return make_intrusive(list, elementType); + } + friend TORCH_API bool operator==(const ListImpl& lhs, const ListImpl& rhs); +}; +} + +namespace impl { + +template class ListIterator; + +template class ListElementReference; + +template +void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept; + +template +bool operator==(const ListElementReference& lhs, const T& rhs); + +template +bool operator==(const T& lhs, const ListElementReference& rhs); + +template +struct ListElementConstReferenceTraits { + // In the general case, we use IValue::to(). + using const_reference = typename c10::detail::ivalue_to_const_ref_overload_return::type; +}; + +// There is no to() overload for std::optional. +template<> +struct ListElementConstReferenceTraits> { + using const_reference = std::optional>; +}; + +template +class ListElementReference final { +public: + operator std::conditional_t< + std::is_reference_v::type>, + const T&, + T>() const; + + ListElementReference& operator=(T&& new_value) &&; + + ListElementReference& operator=(const T& new_value) &&; + + // assigning another ref to this assigns the underlying value + ListElementReference& operator=(ListElementReference&& rhs) && noexcept; + + const IValue& get() const& { + return *iterator_; + } + + friend void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept; + + ListElementReference(const ListElementReference&) = delete; + ListElementReference& operator=(const ListElementReference&) = delete; + ~ListElementReference() = default; + +private: + ListElementReference(Iterator iter) + : iterator_(iter) {} + + // allow moving, but only our friends (i.e. the List class) can move us + ListElementReference(ListElementReference&&) noexcept = default; + ListElementReference& operator=(ListElementReference&& rhs) & noexcept { + iterator_ = std::move(rhs.iterator_); + return *this; + } + + friend class List; + friend class ListIterator; + + Iterator iterator_; +}; + +// this wraps vector::iterator to make sure user code can't rely +// on it being the type of the underlying vector. +template +class ListIterator final { + public: + // C++17 friendly std::iterator implementation + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = ListElementReference; + + explicit ListIterator() = default; + ~ListIterator() = default; + + ListIterator(const ListIterator&) = default; + ListIterator(ListIterator&&) noexcept = default; + ListIterator& operator=(const ListIterator&) = default; + ListIterator& operator=(ListIterator&&) noexcept = default; + + ListIterator& operator++() { + ++iterator_; + return *this; + } + + ListIterator operator++(int) { + ListIterator copy(*this); + ++*this; + return copy; + } + + ListIterator& operator--() { + --iterator_; + return *this; + } + + ListIterator operator--(int) { + ListIterator copy(*this); + --*this; + return copy; + } + + ListIterator& operator+=(typename List::size_type offset) { + iterator_ += offset; + return *this; + } + + ListIterator& operator-=(typename List::size_type offset) { + iterator_ -= offset; + return *this; + } + + ListIterator operator+(typename List::size_type offset) const { + return ListIterator{iterator_ + offset}; + } + + ListIterator operator-(typename List::size_type offset) const { + return ListIterator{iterator_ - offset}; + } + + friend difference_type operator-(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ - rhs.iterator_; + } + + ListElementReference operator*() const { + return {iterator_}; + } + + ListElementReference operator[](typename List::size_type offset) const { + return {iterator_ + offset}; + } + +private: + explicit ListIterator(Iterator iterator): iterator_(std::move(iterator)) {} + + Iterator iterator_; + + friend bool operator==(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ == rhs.iterator_; + } + + friend bool operator!=(const ListIterator& lhs, const ListIterator& rhs) { + return !(lhs == rhs); + } + + friend bool operator<(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ < rhs.iterator_; + } + + friend bool operator<=(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ <= rhs.iterator_; + } + + friend bool operator>(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ > rhs.iterator_; + } + + friend bool operator>=(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ >= rhs.iterator_; + } + + friend class ListIterator; + friend class List; +}; + +template List toTypedList(List list); +template List toList(List&& list); +template List toList(const List& list); +const IValue* ptr_to_first_element(const List& list); +} + +/** + * An object of this class stores a list of values of type T. + * + * This is a pointer type. After a copy, both Lists + * will share the same storage: + * + * > List a; + * > List b = a; + * > b.push_back("three"); + * > ASSERT("three" == a.get(0)); + * + * We use this class in the PyTorch kernel API instead of + * std::vector, because that allows us to do optimizations + * and switch out the underlying list implementation without + * breaking backwards compatibility for the kernel API. + */ +template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class List final { +private: + // This is an intrusive_ptr because List is a pointer type. + // Invariant: This will never be a nullptr, there will always be a valid + // ListImpl. + c10::intrusive_ptr impl_; + + using internal_reference_type = impl::ListElementReference; + using internal_const_reference_type = typename impl::ListElementConstReferenceTraits::const_reference; + +public: + using value_type = T; + using size_type = typename c10::detail::ListImpl::list_type::size_type; + using iterator = impl::ListIterator; + using const_iterator = impl::ListIterator; + using reverse_iterator = impl::ListIterator; + + /** + * Constructs an empty list. + */ + explicit List(); + + /** + * Constructs a list with some initial values. + * Example: + * List a({2, 3, 4}); + */ + List(std::initializer_list initial_values); + explicit List(ArrayRef initial_values); + + /** + * Create a generic list with runtime type information. + * This only works for c10::impl::GenericList and is not part of the public API + * but only supposed to be used internally by PyTorch. + */ + explicit List(TypePtr elementType); + + List(const List&) = default; + List& operator=(const List&) = default; + ~List() = default; + + /** + * Create a new List pointing to a deep copy of the same data. + * The List returned is a new list with separate storage. + * Changes in it are not reflected in the original list or vice versa. + */ + List copy() const; + + /** + * Returns the element at specified location pos, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + */ + internal_const_reference_type get(size_type pos) const; + + /** + * Moves out the element at the specified location pos and returns it, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + * The list contains an invalid element at position pos afterwards. Any operations + * on it before re-setting it are invalid. + */ + value_type extract(size_type pos) const; + + /** + * Returns a reference to the element at specified location pos, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + * + * You cannot store the reference, but you can read it and assign new values to it: + * + * List list = ...; + * list[2] = 5; + * int64_t v = list[1]; + */ + internal_const_reference_type operator[](size_type pos) const; + + internal_reference_type operator[](size_type pos); + + /** + * Assigns a new value to the element at location pos. + */ + void set(size_type pos, const value_type& value) const; + + /** + * Assigns a new value to the element at location pos. + */ + void set(size_type pos, value_type&& value) const; + + /** + * Returns an iterator to the first element of the container. + * If the container is empty, the returned iterator will be equal to end(). + */ + iterator begin() const; + + /** + * Returns an iterator to the element following the last element of the container. + * This element acts as a placeholder; attempting to access it results in undefined behavior. + */ + iterator end() const; + + /** + * Checks if the container has no elements. + */ + bool empty() const; + + /** + * Returns the number of elements in the container + */ + size_type size() const; + + /** + * Increase the capacity of the vector to a value that's greater or equal to new_cap. + */ + void reserve(size_type new_cap) const; + + /** + * Erases all elements from the container. After this call, size() returns zero. + * Invalidates any references, pointers, or iterators referring to contained elements. Any past-the-end iterators are also invalidated. + */ + void clear() const; + + /** + * Inserts value before pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator insert(iterator pos, const T& value) const; + + /** + * Inserts value before pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator insert(iterator pos, T&& value) const; + + /** + * Inserts a new element into the container directly before pos. + * The new element is constructed with the given arguments. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + template + iterator emplace(iterator pos, Args&&... value) const; + + /** + * Appends the given element value to the end of the container. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void push_back(const T& value) const; + + /** + * Appends the given element value to the end of the container. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void push_back(T&& value) const; + + /** + * Appends the given list to the end of the container. Uses at most one memory allocation. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void append(List lst) const; + + /** + * Appends the given element value to the end of the container. + * The new element is constructed with the given arguments. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + template + void emplace_back(Args&&... args) const; + + /** + * Removes the element at pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator erase(iterator pos) const; + + /** + * Removes the elements in the range [first, last). + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator erase(iterator first, iterator last) const; + + /** + * Removes the last element of the container. + * Calling pop_back on an empty container is undefined. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void pop_back() const; + + /** + * Resizes the container to contain count elements. + * If the current size is less than count, additional default-inserted elements are appended. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void resize(size_type count) const; + + /** + * Resizes the container to contain count elements. + * If the current size is less than count, additional copies of value are appended. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void resize(size_type count, const T& value) const; + + /** + * Value equality comparison. This function implements Python-like semantics for + * equality: two lists with the same identity (e.g. same pointer) trivially + * compare equal, otherwise each element is compared for equality. + */ + template + friend bool operator==(const List& lhs, const List& rhs); + + template + friend bool operator!=(const List& lhs, const List& rhs); + + /** + * Identity comparison. Returns true if and only if `rhs` represents the same + * List object as `this`. + */ + bool is(const List& rhs) const; + + std::vector vec() const; + + /** + * Returns the number of Lists currently pointing to this same list. + * If this is the only instance pointing to this list, returns 1. + */ + // TODO Test use_count + size_t use_count() const; + + TypePtr elementType() const; + + // See [unsafe set type] for why this exists. + void unsafeSetElementType(TypePtr t); + +private: + explicit List(c10::intrusive_ptr&& elements); + explicit List(const c10::intrusive_ptr& elements); + friend struct IValue; + template friend List impl::toTypedList(List); + template friend List impl::toList(List&&); + template friend List impl::toList(const List&); + friend const IValue* impl::ptr_to_first_element(const List& list); +}; + +namespace impl { +// GenericList is how IValue stores lists. It is, however, not part of the +// public API. Kernels should use Lists with concrete types instead +// (maybe except for some internal prim ops). +using GenericList = List; + +} +} + +namespace torch { + template using List = c10::List; +} + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..55c1e24c25707009608a16be209661ad9b0c827f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h @@ -0,0 +1,358 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +template decltype(auto) getTypePtr(); +std::string toString(const Type& type); + +template +List::List(c10::intrusive_ptr&& elements) +: impl_(std::move(elements)) {} + +template +List::List(const c10::intrusive_ptr& elements) +: impl_(elements) {} + +template +List::List() +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + getTypePtr())) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType) instead."); +} + +template +List::List(ArrayRef values) +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + getTypePtr())) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType)."); + impl_->list.reserve(values.size()); + for (const T& element : values) { + impl_->list.push_back(element); + } +} + +template +List::List(std::initializer_list initial_values) +: List(ArrayRef(initial_values)) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType)."); +} + +template +List::List(TypePtr elementType) +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + std::move(elementType))) { + static_assert(std::is_same_v || std::is_same_v>, + "This constructor is only valid for c10::impl::GenericList or List."); +} + +namespace impl { +template +List toTypedList(impl::GenericList list) { + // If there's other instances of the list (i.e. list.use_count() > 1), then we have to be invariant + // because upcasting would allow people to add types into the new list that would break the old list. + // However, if there aren't any other instances of this list (i.e. list.use_count() == 1), then we can + // allow upcasting. This can be a perf improvement since we can cast List to List> + // without having to copy it. This is also used to provide backwards compatibility with some old models + // that serialized the index arguments to aten::index, aten::index_put, aten::index_put_ and aten::index_put_impl_ + // as List before we changed that argument to be List>. When deserializing, we + // have list.use_count() == 1 and can deserialize the List directly as List>. + TORCH_CHECK(*list.impl_->elementType == *getTypePtr() + || (list.use_count() == 1 && list.impl_->elementType->isSubtypeOf(*getTypePtr())) + , "Tried to cast a List<", toString(*list.impl_->elementType), "> to a List<", toString(*getTypePtr()), ">. Types mismatch."); + return List(std::move(list.impl_)); +} + +template +impl::GenericList toList(List&& list) { + return GenericList(std::move(list.impl_)); +} +template +impl::GenericList toList(const List& list) { + return GenericList(list.impl_); +} +} + +template +List List::copy() const { + return List(impl_->copy()); +} + +namespace detail { + template + T list_element_to(T element) { + return element; + } + template + T list_element_to(const IValue& element) { + return element.template to(); + } + template + T list_element_to(IValue&& element) { + return std::move(element).template to(); + } + template + struct ListElementFrom { + static IValue from(const T& element) { + return element; + } + static IValue from(T&& element) { + return std::move(element); + } + }; + template<> + struct ListElementFrom { + static const IValue& from(const IValue& element) { + return element; + } + static IValue&& from(IValue&& element) { + return std::move(element); + } + }; +} + +namespace impl { + +template +ListElementReference::operator std::conditional_t< + std::is_reference_v::type>, + const T&, + T>() const { + return iterator_->template to(); +} + +template +ListElementReference& ListElementReference::operator=(T&& new_value) && { + *iterator_ = c10::detail::ListElementFrom::from(std::move(new_value)); + return *this; +} + +template +ListElementReference& ListElementReference::operator=(const T& new_value) && { + *iterator_ = c10::detail::ListElementFrom::from(new_value); + return *this; +} + +template +ListElementReference& ListElementReference::operator=(ListElementReference&& rhs) && noexcept { + *iterator_ = *rhs.iterator_; + return *this; +} + +template +void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept { + std::swap(*lhs.iterator_, *rhs.iterator_); +} + +template +bool operator==(const ListElementReference& lhs, const T& rhs) { + const T& lhs_tmp = lhs; + return lhs_tmp == rhs; +} + +template +inline bool operator==(const T& lhs, const ListElementReference& rhs) { + return rhs == lhs; +} + +template +inline typename ListElementConstReferenceTraits::const_reference +list_element_to_const_ref(const IValue& element) { + return element.template to(); +} + +template<> +inline typename ListElementConstReferenceTraits>::const_reference +list_element_to_const_ref>(const IValue& element) { + return element.toOptionalStringRef(); +} + +} // namespace impl + +template +void List::set(size_type pos, const value_type& value) const { + impl_->list.at(pos) = c10::detail::ListElementFrom::from(value); +} + +template +void List::set(size_type pos, value_type&& value) const { + impl_->list.at(pos) = c10::detail::ListElementFrom::from(std::move(value)); +} + +template +typename List::internal_const_reference_type List::get(size_type pos) const { + return operator[](pos); +} + +template +typename List::internal_const_reference_type List::operator[](size_type pos) const { + return c10::impl::list_element_to_const_ref(impl_->list.at(pos)); +} + +template +typename List::internal_reference_type List::operator[](size_type pos) { + static_cast(impl_->list.at(pos)); // Throw the exception if it is out of range. + return {impl_->list.begin() + static_castlist)::difference_type>(pos)}; +} + +template +typename List::value_type List::extract(size_type pos) const { + auto& elem = impl_->list.at(pos); + auto result = c10::detail::list_element_to(std::move(elem)); + // Reset the list element to a T() instead of None to keep it correctly typed + elem = c10::detail::ListElementFrom::from(T{}); + return result; +} + +template +typename List::iterator List::begin() const { + return iterator(impl_->list.begin()); +} + +template +typename List::iterator List::end() const { + return iterator(impl_->list.end()); +} + +template +bool List::empty() const { + return impl_->list.empty(); +} + +template +typename List::size_type List::size() const { + return impl_->list.size(); +} + +template +void List::reserve(size_type new_cap) const { + impl_->list.reserve(new_cap); +} + +template +void List::clear() const { + impl_->list.clear(); +} + +template +typename List::iterator List::insert(iterator pos, const T& value) const { + return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom::from(value)) }; +} + +template +typename List::iterator List::insert(iterator pos, T&& value) const { + return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom::from(std::move(value))) }; +} + +template +template +typename List::iterator List::emplace(iterator pos, Args&&... value) const { + // TODO Use list_element_from? + return iterator { impl_->list.emplace(pos.iterator_, std::forward(value)...) }; +} + +template +void List::push_back(const T& value) const { + impl_->list.push_back(c10::detail::ListElementFrom::from(value)); +} + +template +void List::push_back(T&& value) const { + impl_->list.push_back(c10::detail::ListElementFrom::from(std::move(value))); +} + +template +void List::append(List b) const { + if (b.use_count() == 1) { + impl_->list.insert(impl_->list.end(), make_move_iterator(b.impl_->list.begin()), make_move_iterator(b.impl_->list.end())); + } else { + impl_->list.insert(impl_->list.end(), b.impl_->list.begin(), b.impl_->list.end()); + } +} + +template +template +void List::emplace_back(Args&&... args) const { + // TODO Use list_element_from? + impl_->list.push_back(T(std::forward(args)...)); +} + +template +typename List::iterator List::erase(iterator pos) const { + return iterator { impl_->list.erase(pos.iterator_) }; +} + +template +typename List::iterator List::erase(iterator first, iterator last) const { + return iterator { impl_->list.erase(first.iterator_, last.iterator_) }; +} + +template +void List::pop_back() const { + impl_->list.pop_back(); +} + +template +void List::resize(size_type count) const { + impl_->list.resize(count, T{}); +} + +template +void List::resize(size_type count, const T& value) const { + impl_->list.resize(count, value); +} + +template +bool operator==(const List& lhs, const List& rhs) { + // Lists with the same identity trivially compare equal. + if (lhs.impl_ == rhs.impl_) { + return true; + } + + // Otherwise, just compare values directly. + return *lhs.impl_ == *rhs.impl_; +} + +template +bool operator!=(const List& lhs, const List& rhs) { + return !(lhs == rhs); +} + +template +bool List::is(const List& rhs) const { + return this->impl_ == rhs.impl_; +} + +template +std::vector List::vec() const { + std::vector result(begin(), end()); + return result; +} + +template +size_t List::use_count() const { + return impl_.use_count(); +} + +template +TypePtr List::elementType() const { + return impl_->elementType; +} + +template +void List::unsafeSetElementType(TypePtr t) { + impl_->elementType = std::move(t); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h new file mode 100644 index 0000000000000000000000000000000000000000..7eb7a2c4bdc17d6d4df0d12a9c97c7dbbccd8485 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h @@ -0,0 +1,199 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// define constants like M_PI and C keywords for MSVC +#ifdef _MSC_VER +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#include +#endif + +#include +#include +#include + +namespace at { + +constexpr int MERSENNE_STATE_N = 624; +constexpr int MERSENNE_STATE_M = 397; +constexpr uint32_t MATRIX_A = 0x9908b0df; +constexpr uint32_t UMASK = 0x80000000; +constexpr uint32_t LMASK = 0x7fffffff; + +/** + * Note [Mt19937 Engine implementation] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Originally implemented in: + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/MTARCOK/mt19937ar-cok.c + * and modified with C++ constructs. Moreover the state array of the engine + * has been modified to hold 32 bit uints instead of 64 bits. + * + * Note that we reimplemented mt19937 instead of using std::mt19937 because, + * at::mt19937 turns out to be faster in the pytorch codebase. PyTorch builds with -O2 + * by default and following are the benchmark numbers (benchmark code can be found at + * https://github.com/syed-ahmed/benchmark-rngs): + * + * with -O2 + * Time to get 100000000 philox randoms with at::uniform_real_distribution = 0.462759s + * Time to get 100000000 at::mt19937 randoms with at::uniform_real_distribution = 0.39628s + * Time to get 100000000 std::mt19937 randoms with std::uniform_real_distribution = 0.352087s + * Time to get 100000000 std::mt19937 randoms with at::uniform_real_distribution = 0.419454s + * + * std::mt19937 is faster when used in conjunction with std::uniform_real_distribution, + * however we can't use std::uniform_real_distribution because of this bug: + * http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524. Plus, even if we used + * std::uniform_real_distribution and filtered out the 1's, it is a different algorithm + * than what's in pytorch currently and that messes up the tests in tests_distributions.py. + * The other option, using std::mt19937 with at::uniform_real_distribution is a tad bit slower + * than at::mt19937 with at::uniform_real_distribution and hence, we went with the latter. + * + * Copyright notice: + * A C-program for MT19937, with initialization improved 2002/2/10. + * Coded by Takuji Nishimura and Makoto Matsumoto. + * This is a faster version by taking Shawn Cokus's optimization, + * Matthe Bellew's simplification, Isaku Wada's real version. + * + * Before using, initialize the state by using init_genrand(seed) + * or init_by_array(init_key, key_length). + * + * Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Any feedback is very welcome. + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + * email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + */ + +/** + * mt19937_data_pod is used to get POD data in and out + * of mt19937_engine. Used in torch.get_rng_state and + * torch.set_rng_state functions. + */ +struct mt19937_data_pod { + uint64_t seed_; + int left_; + bool seeded_; + uint32_t next_; + std::array state_; +}; + +class mt19937_engine { +public: + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + inline explicit mt19937_engine(uint64_t seed = 5489) { + init_with_uint32(seed); + } + + inline mt19937_data_pod data() const { + return data_; + } + + inline void set_data(const mt19937_data_pod& data) { + data_ = data; + } + + inline uint64_t seed() const { + return data_.seed_; + } + + inline bool is_valid() { + if ((data_.seeded_ == true) + && (data_.left_ > 0 && data_.left_ <= MERSENNE_STATE_N) + && (data_.next_ <= MERSENNE_STATE_N)) { + return true; + } + return false; + } + + inline uint32_t operator()() { + if (--(data_.left_) == 0) { + next_state(); + } + uint32_t y = *(data_.state_.data() + data_.next_++); + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680; + y ^= (y << 15) & 0xefc60000; + y ^= (y >> 18); + + return y; + } + +private: + mt19937_data_pod data_; + + inline void init_with_uint32(uint64_t seed) { + data_.seed_ = seed; + data_.seeded_ = true; + data_.state_[0] = seed & 0xffffffff; + for (const auto j : c10::irange(1, MERSENNE_STATE_N)) { + data_.state_[j] = (1812433253 * (data_.state_[j-1] ^ (data_.state_[j-1] >> 30)) + j); + } + data_.left_ = 1; + data_.next_ = 0; + } + + inline uint32_t mix_bits(uint32_t u, uint32_t v) { + return (u & UMASK) | (v & LMASK); + } + + inline uint32_t twist(uint32_t u, uint32_t v) { + return (mix_bits(u,v) >> 1) ^ (v & 1 ? MATRIX_A : 0); + } + + inline void next_state() { + uint32_t* p = data_.state_.data(); + data_.left_ = MERSENNE_STATE_N; + data_.next_ = 0; + + for(int j = MERSENNE_STATE_N - MERSENNE_STATE_M + 1; --j; p++) { + *p = p[MERSENNE_STATE_M] ^ twist(p[0], p[1]); + } + + for(int j = MERSENNE_STATE_M; --j; p++) { + *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], p[1]); + } + + *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], data_.state_[0]); + } + +}; + +typedef mt19937_engine mt19937; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..890373220b0a34e893e9fd950a93229f22eea137 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h @@ -0,0 +1,148 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +class TensorBase; + +// XXX: This file exists because TensorImpl is in c10, but Dimname is in ATen. +// Due to the c10/ATen library split, TensorImpl cannot depend on Dimname, +// so we have a couple of workarounds. +// +// In the long term, we'll move Dimname to c10 and everything in this file +// can be refactored out. The main blocker for that is that "c10::Symbol" +// actually exists outside of c10 and needs to be moved in. + +// TensorImpl has a unique_ptr field. +// XXX: Ideally we would just put std::optional> into TensorImpl. +// +// This class has an important invariant: there must be at least ONE +// non-wildcard +struct TORCH_API NamedTensorMeta final : public c10::NamedTensorMetaInterface { + // This enum is to remind people that the invariant on constructors is that + // the list of dimnames must have at least one non-wildcard + enum HAS_NON_WILDCARD { + HasNonWildcard + }; + + explicit NamedTensorMeta(HAS_NON_WILDCARD /*unused*/, DimnameList names) + : names_(names.vec()) { + check_invariants(); + } + explicit NamedTensorMeta(HAS_NON_WILDCARD /*unused*/, std::vector&& names) + : names_(std::move(names)) { + check_invariants(); + } + + std::unique_ptr clone() const override { + return std::make_unique(HasNonWildcard, names_); + } + + DimnameList names() const { return names_; } + + // Used for an assertion in TensorImpl.h + int64_t slow_dim() const override { + return static_cast(names_.size()); + } + + void check_invariants() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + std::any_of(names_.begin(), names_.end(), [](const Dimname& n) { return !n.isWildcard(); })); + } + + void set_names(HAS_NON_WILDCARD /*unused*/, DimnameList new_names) { + TORCH_INTERNAL_ASSERT(new_names.size() == names_.size()); + std::copy(new_names.begin(), new_names.end(), names_.begin()); + check_invariants(); + } + + void set_names(HAS_NON_WILDCARD /*unused*/, std::vector&& new_names) { + TORCH_INTERNAL_ASSERT(new_names.size() == names_.size()); + names_ = std::move(new_names); + check_invariants(); + } + + // INVARIANT: at least one Dimname is non-WILDCARD + std::vector names_; +}; + +// When NamesMode is disabled, then all operations ignore tensors' names fields. +// Concretely speaking, all tensors are treated as having nullopt names. +struct TORCH_API NamesMode { + static bool is_enabled(); + static void set_enabled(bool enabled); +}; + + +// A RAII, thread local (!) guard that enables or disables names upon +// construction, and sets it back to the original value upon destruction. +struct TORCH_API NoNamesGuard { + NoNamesGuard() : prev_mode(NamesMode::is_enabled()) { + NamesMode::set_enabled(false); + } + NoNamesGuard(const NoNamesGuard&) = delete; + NoNamesGuard(NoNamesGuard&&) = delete; + NoNamesGuard& operator=(const NoNamesGuard&) = delete; + NoNamesGuard& operator=(NoNamesGuard&&) = delete; + ~NoNamesGuard() { + if (initialized) { + reset(); + } + } + void reset() { + TORCH_INTERNAL_ASSERT(initialized); + NamesMode::set_enabled(prev_mode); + } + private: + bool prev_mode; + bool initialized{true}; +}; + +void check_names_valid_for(const TensorBase& tensor, DimnameList names); +void check_names_valid_for(size_t tensor_dim, DimnameList names); + +// Sets the names of `tensor` to be `names`. +TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::optional names); +TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::vector&& names, bool validate_names); + +constexpr size_t kMaxNamedTensorDim = 64; + +DimnameList default_names(size_t len); + +namespace impl { + +// Some helper functions on TensorImpl. Useful for working with names in TH. +// XXX: Ideally these would exist as methods on TensorImpl +TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::optional names, bool validate_names); +TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::vector&& names, bool validate_names); + +void check_names_valid_for(TensorImpl* impl, DimnameList names); + +// Returns true if the tensor's names exist and are not all 'None'. +// Returns false if the tensor's names don't exist (were not allocated), +// or if all names are 'None'. +// We treat not-allocated-names the same as allocated names that are all 'None'. +TORCH_API bool has_names(const TensorImpl* impl); + +// Returns the names of the tensor's dimensions. +// Unnamed tensors are treated as having 'None' in all dimension; this method +// would return a DimnameList of all 'None's for an unnamed tensor. +TORCH_API DimnameList get_names(const TensorImpl* impl); + +// This is more of an implementation detail; one should use impl::get_names / +// Tensor::names() whenever possible because it provides a cleaner API. +// Returns the names of the tensor if they have been allocated; returns nullopt +// instead if the haven't been. The names of a tensor are not allocated if a +// tensor is constructed with names=None. +TORCH_API std::optional get_opt_names(const TensorImpl* impl); + +} // namespace impl + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..5104fa756ff6d9dea41b70d4140b1122ddf1bc14 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h @@ -0,0 +1,192 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// The motivating usecase for this is to represent the ragged size structure +// of a jagged tensor [B, [s_0, s_1, s_2], D] as a single integer j0. This +// allows us to simply return [B, j0, D] if someone queries for the size of our +// tensor. +// +// Morally we define comparison between two nested ints to return true if +// that comparison holds for all corresponding elements of the arrays they +// represent. Comparison between a nested int and a plain int is defined +// similarly. +// +// To simulate this desired behavior but also avoid the O(N) cost of checking, +// we associate each raggedness pattern with an integer "id" that can be used as +// a proxy to evaluate equality. We also constrain the range of values for this +// as to enable inequality checks. +// +// We also support a positive integer scalar "coeff" that is used for computing +// strides. For example given, a [B, j0, D] tensor, it can be strided in two +// different ways: [D * j0, D, 1] and [j0, 1, sum(j0)]. The coeff is used to +// differentiate the two cases. +// +// During tracing the strides of the outputs need to be a function of the size +// and strides of the inputs so it is important that NestedIntSymNode itself is +// able to express this. +class TORCH_API NestedIntSymNodeImpl : public SymNodeImpl { + public: + // CAUTION: you should probably not be constructing these directly; please + // the higher-level API in python instead (TODO: actually introduce that). + explicit NestedIntSymNodeImpl(int64_t val, int64_t coeff) + : val_(val), coeff_(coeff) {} + + bool bool_() override { + return false; + } + + bool is_int() override { + return true; + } + + bool is_float() override { + return false; + } + + bool is_bool() override { + return false; + } + + bool is_nested_int() const override { + return true; + } + + bool has_hint() override { + return true; + } + + c10::SymNode wrap_int(int64_t num) override { + return SymNode(c10::make_intrusive>(num)); + } + + int64_t guard_int(const char* file, int64_t line) override { + TORCH_CHECK(false); + } + + double guard_float(const char* file, int64_t line) override { + TORCH_CHECK(false, "not a float"); + } + + bool guard_bool(const char* file, int64_t line) override { + TORCH_CHECK(false, "not a bool"); + } + + int64_t int_() override { + TORCH_CHECK(false); + } + + std::string str() override { + if (coeff_ == 1) { + return "j" + std::to_string(val_); + } + return std::to_string(coeff_) + "*j" + std::to_string(val_); + } + + // NOTE [ Inequalities with nested int ] + // + // The semantics of nested int when it comes to relations is that it is + // treated as integer known to be within a certain range, + // + // j0 \in [2, int64_t::max] + // + // allowing us to answer queries like j0 >= 1 (True), and j0 == 0 (False). + // This is a useful default range for the raggedness pattern of a jagged + // tensor (1) since sizes are non-negative, and (2) we need to get past 0/1 + // specialization checks. + // + // [ Indeterminate inequalities error out ] + // + // Given the semantic defined above, certain relations like j0 < 3 are thus + // indeterminable. In our impl today, evaluating such relations error + // + // It may seem convenient to just define indeterminate relations to return + // False, but the implementation we maintain in parallel using sympy does not + // allow this. + // + // Sympy only allows overriding of Ge. The other relations (Lt, Gt, Le) are, + // by consequence, all derived from Ge e.g., Lt(a, b) := !Ge(a, b). This + // would mean that means that if we define the indeterminate j0 >= 3 to be + // False, the also indeterminate j0 < 3 will be evaluated to be True! + // + // [ Coefficient are assumed positive ] + // + // For the purpose of computing inequalities, we consider the coefficient of + // the nested int to be a positive integer. + // + // Thus, no modifications are needed to the logic since + // j0 >= k implies coeff * j0 >= k + // + c10::SymNode eq(const c10::SymNode& other) override; + c10::SymNode ne(const c10::SymNode& other) override; + c10::SymNode ge(const c10::SymNode& other) override; + c10::SymNode gt(const c10::SymNode& other) override; + c10::SymNode lt(const c10::SymNode& other) override; + c10::SymNode le(const c10::SymNode& other) override; + c10::SymNode mul(const c10::SymNode& other) override; + + std::optional nested_int() override { + return val_; + } + + std::optional nested_int_coeff() override { + return coeff_; + } + + bool is_symbolic() override { + return false; + } + + c10::SymNode clone() override; + +#define DEFINE_BINARY_NOT_SUPPORTED(name) \ + c10::SymNode name(const c10::SymNode& other) override { \ + TORCH_CHECK(false, #name " not supported by NestedIntSymNode"); \ + } + + DEFINE_BINARY_NOT_SUPPORTED(add) + DEFINE_BINARY_NOT_SUPPORTED(sub) + DEFINE_BINARY_NOT_SUPPORTED(truediv) + DEFINE_BINARY_NOT_SUPPORTED(pow) + DEFINE_BINARY_NOT_SUPPORTED(floordiv) + DEFINE_BINARY_NOT_SUPPORTED(mod) + DEFINE_BINARY_NOT_SUPPORTED(sym_min) + DEFINE_BINARY_NOT_SUPPORTED(sym_max) + DEFINE_BINARY_NOT_SUPPORTED(sym_and) + DEFINE_BINARY_NOT_SUPPORTED(sym_or) + +#undef DEFINE_BINARY_NOT_SUPPORTED + +#define DEFINE_NOT_SUPPORTED(name) \ + c10::SymNode name() override { \ + TORCH_CHECK(false, #name " is not supported by NestedIntSymNode"); \ + } + + DEFINE_NOT_SUPPORTED(sym_not) + DEFINE_NOT_SUPPORTED(ceil) + DEFINE_NOT_SUPPORTED(floor) + DEFINE_NOT_SUPPORTED(neg) + DEFINE_NOT_SUPPORTED(sym_float) + +#undef DEFINE_NOT_SUPPORTED + + private: + int64_t val_; + int64_t coeff_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h new file mode 100644 index 0000000000000000000000000000000000000000..0146c2801c1b93b6b8d7e8c5043ed22ffad1f449 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h @@ -0,0 +1,245 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// define constants like M_PI and C keywords for MSVC +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#endif + + +#ifdef __CUDACC__ +#include +#endif + +#include +#include +#include +#include + +namespace at { + +// typedefs for holding vector data +namespace detail { + +typedef std::array UINT4; +typedef std::array UINT2; +typedef std::array DOUBLE2; +typedef std::array FLOAT2; + +} // namespace detail + +/** + * Note [Philox Engine implementation] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Originally implemented in PyTorch's fusion compiler + * Refer to: http://www.thesalmons.org/john/random123/papers/random123sc11.pdf + * for details regarding the engine. + * + * Note that currently this implementation of the philox engine is not used + * anywhere except for tests in cpu_generator_test.cpp. However, this engine + * will replace curandStatePhilox4_32_10_t in the future. + * + * The philox engine takes a seed value, a subsequeunce + * for starting the generation and an offset for the subsequence. + * Think of this engine as an algorithm producing a huge array. We are + * parallelizing this array by partitioning the huge array and assigning + * a thread index to each partition. In other words, each seed value + * (there are 2^64 possible seed values) gives a sub array of size + * 2^128 (each element in that array is a 128 bit number). Reasoning + * behind the array being of size 2^128 is, there are 2^64 possible + * thread index value and there is an array of size 2^64 for each of + * those thread index. Hence 2^64 * 2^64 = 2^128 for each seed value. + * + * In short, this generator can produce 2^64 (seed values) * 2^128 (number + * of elements in an array given by a seed value) = 2^192 values. + * + * Arguments: + * seed: Seed values could be any number from 0 to 2^64-1. + * subsequence: Subsequence is just the cuda thread indexing with: + * - blockIdx.x * blockDim.x + threadIdx.x + * offset: The offset variable in PhiloxEngine decides how many 128-bit + * random numbers to skip (i.e. how many groups of 4, 32-bit numbers to skip) + * and hence really decides the total number of randoms that can be achieved + * for the given subsequence. + */ + +class philox_engine { +public: + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + C10_HOST_DEVICE inline explicit philox_engine(uint64_t seed = 67280421310721, + uint64_t subsequence = 0, + uint64_t offset = 0) { + + reset_state(seed, subsequence); + incr_n(offset); + } + + C10_HOST_DEVICE inline void reset_state(uint64_t seed = 67280421310721, + uint64_t subsequence = 0) { + key_[0] = static_cast(seed); + key_[1] = static_cast(seed >> 32); + counter_ = detail::UINT4{}; + counter_[2] = static_cast(subsequence); + counter_[3] = static_cast(subsequence >> 32); + STATE = 0; + } + + /** + * Set the offset field of Philox Generator to the desired offset. + */ + C10_HOST_DEVICE inline void set_offset(uint64_t offset) { + counter_[0] = static_cast(offset); + counter_[1] = static_cast(offset >> 32); + } + + /** + * Gets the current offset of the Philox Generator. + */ + C10_HOST_DEVICE uint64_t get_offset() const { + uint64_t lo = static_cast(counter_[0]); + uint64_t hi = static_cast(counter_[1]) << 32; + return lo | hi; + } + + /** + * Produces a unique 32-bit pseudo random number on every invocation. Bookeeps state to avoid waste. + */ + C10_HOST_DEVICE inline uint32_t operator()(int32_t n_rounds = 10) { // 10 here to preserve back-compat behavior + if(STATE == 0) { + detail::UINT4 counter = counter_; + detail::UINT2 key = key_; + output_ = rand(counter, key, n_rounds); + incr(); + } + uint32_t ret = output_[static_cast(STATE)]; + STATE = (STATE + 1) & 3; + return ret; + } + + inline float randn(uint32_t n_rounds) { + #ifdef __CUDA_ARCH__ + AT_ASSERT(false, "Unsupported invocation of randn on CUDA"); + #endif + if(STATE == 0) { + detail::UINT4 counter = counter_; + detail::UINT2 key = key_; + output_ = rand(counter, key, n_rounds); + incr(); + } + // TODO(min-jean-cho) change to Polar method, a more efficient version of Box-Muller method + // TODO(voz) We use std:: below, and thus need a separate impl for CUDA. + float u1 = 1 - uint32_to_uniform_float(output_[0]); // uint32_to_uniform_float returns [0,1), we need (0,1] to avoid passing 0 to log. + float u2 = 1 - uint32_to_uniform_float(output_[1]); + return static_cast(std::sqrt(-2.0 * std::log(u1)) * std::cos(2.0 * M_PI * u2)); + } + + /** + * Function that Skips N 128 bit numbers in a subsequence + */ + C10_HOST_DEVICE inline void incr_n(uint64_t n) { + uint32_t nlo = static_cast(n); + uint32_t nhi = static_cast(n >> 32); + counter_[0] += nlo; + // if overflow in x has occurred, carry over to nhi + if (counter_[0] < nlo) { + nhi++; + // if overflow in nhi has occurred during carry over, + // propagate that overflow to y and exit to increment z + // otherwise return + counter_[1] += nhi; + if(nhi != 0) { + if (nhi <= counter_[1]) { + return; + } + } + } else { + // if overflow in y has occurred during addition, + // exit to increment z + // otherwise return + counter_[1] += nhi; + if (nhi <= counter_[1]) { + return; + } + } + if (++counter_[2]) + return; + ++counter_[3]; + } + + /** + * Function that Skips one 128 bit number in a subsequence + */ + C10_HOST_DEVICE inline void incr() { + if (++counter_[0]) + return; + if (++counter_[1]) + return; + if (++counter_[2]) { + return; + } + ++counter_[3]; + } + +private: + detail::UINT4 counter_; + detail::UINT4 output_; + detail::UINT2 key_; + uint32_t STATE; + + C10_HOST_DEVICE inline uint32_t mulhilo32(uint32_t a, uint32_t b, + uint32_t *result_high) { + #ifdef __CUDA_ARCH__ + *result_high = __umulhi(a, b); + return a*b; + #else + const uint64_t product = static_cast(a) * b; + *result_high = static_cast(product >> 32); + return static_cast(product); + #endif + } + + C10_HOST_DEVICE inline detail::UINT4 single_round(detail::UINT4 ctr, detail::UINT2 in_key) { + uint32_t hi0 = 0; + uint32_t hi1 = 0; + uint32_t lo0 = mulhilo32(kPhiloxSA, ctr[0], &hi0); + uint32_t lo1 = mulhilo32(kPhiloxSB, ctr[2], &hi1); + detail::UINT4 ret; + ret[0] = hi1 ^ ctr[1] ^ in_key[0]; + ret[1] = lo1; + ret[2] = hi0 ^ ctr[3] ^ in_key[1]; + ret[3] = lo0; + return ret; + } + + C10_HOST_DEVICE constexpr float uint32_to_uniform_float(uint32_t value) { + // maximum value such that `MAX_INT * scale < 1.0` (with float rounding) + constexpr float scale = 4.6566127342e-10; + return static_cast(value & 0x7FFFFFFF) * scale; + } + + + + C10_HOST_DEVICE inline detail::UINT4 rand(detail::UINT4& counter, detail::UINT2& key, uint32_t n_rounds) { + for (uint32_t round = 0; round < (n_rounds - 1); round++) { + counter = single_round(counter, key); + key[0] += (kPhilox10A); key[1] += (kPhilox10B); + } + return single_round(counter, key); + } + + + static constexpr uint32_t kPhilox10A = 0x9E3779B9; + static constexpr uint32_t kPhilox10B = 0xBB67AE85; + static constexpr uint32_t kPhiloxSA = 0xD2511F53; + static constexpr uint32_t kPhiloxSB = 0xCD9E8D57; +}; + +typedef philox_engine Philox4_32; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h new file mode 100644 index 0000000000000000000000000000000000000000..c79644c56704f057444caa2e60d92a76d26f94bf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + + +namespace at::impl { + +struct TORCH_API RestorePythonTLSSnapshot { + RestorePythonTLSSnapshot(); + RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete; + RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete; + ~RestorePythonTLSSnapshot(); + +private: + c10::impl::LocalDispatchKeySet saved_; + c10::impl::ForceDispatchKeyGuard guard_; +}; + + +// RAII guard to make working with the above TLS safer. +struct TORCH_API MaybeSetTLSOnEntryGuard { +public: + MaybeSetTLSOnEntryGuard(); + MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete; + MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete; + ~MaybeSetTLSOnEntryGuard(); + +private: + bool value_set_; +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h new file mode 100644 index 0000000000000000000000000000000000000000..bbbc9b5308c5c12b6d71684d1a1c9d0047df990a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// TODO: this can probably live in c10 + + +namespace at::impl { + +class TORCH_API PythonOpRegistrationTrampoline final { + static std::atomic interpreter_; + +public: + // Returns true if you successfully registered yourself (that means + // you are in the hot seat for doing the operator registrations!) + static bool registerInterpreter(c10::impl::PyInterpreter* /*interp*/); + + // Returns nullptr if no interpreter has been registered yet. + static c10::impl::PyInterpreter* getInterpreter(); +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h new file mode 100644 index 0000000000000000000000000000000000000000..fbb5f92f0e2214090ce7570877b7cfb6ea27168d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace at { + +class Tensor; +struct QTensorImpl; +struct Quantizer; +using ConstQuantizerPtr = const c10::intrusive_ptr&; +using QuantizerPtr = c10::intrusive_ptr; + +/** + * Quantizer is the class for storing all the information + * that's necessary to perform quantize and dequantize + * operation. + * + * We might have different types of quantization schemes and this is + * the base class for all quantizers. + * + * QTensorImpl will hold a pointer to Quantizer so that we can support + * different quantization schemes on Tensor. + * + * For example, the most common quantization scheme, Affine Quantization, + * requires scale and zero_point as parameters, we'll store scale and zero_point + * inside the instance and we can use it to quantize a float Tensor or + * dequantize a quantized Tensor. + * + * When you add new types of leaf Quantizer class, please also + * make sure to add a corresponding QScheme enum since + * they should have one to one mapping. + * + * Note about intrusive_ptr: + * Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can + * share the same Quantizer. Quantizer should be immutable. + */ +struct TORCH_API Quantizer : public c10::intrusive_ptr_target { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ScalarType scalar_type_; + explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} + ~Quantizer() override = default; + + // Copied from torch/csrc/jit/ir/scope.h + QuantizerPtr intrusive_from_this() { + c10::raw::intrusive_ptr::incref(this); // we are creating a new pointer + // from a raw `this` pointer + // so we need to bump the refcount + // to account for this ownership + return c10::intrusive_ptr::reclaim(this); + } + + /** + * Each concrete Quantizer type should have a unique QScheme type. + */ + virtual QScheme qscheme() const = 0; + + ScalarType scalar_type() const { + return scalar_type_; + } + + /** + * quantize a float Tensor into a quantized Tensor. + */ + virtual Tensor quantize(const Tensor& t) = 0; + + /** + * dequantize a quantized Tensor into a float Tensor. + */ + virtual Tensor dequantize(const Tensor& t) = 0; + + /** + * dequantize a quantized Tensor into a float Tensor, out= variant + */ + virtual Tensor& dequantize_out(Tensor& out, const Tensor& t) = 0; + + /** + * Compare against `other` for equality. + */ + virtual bool equalTo(QuantizerPtr other) const = 0; +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Range.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Range.h new file mode 100644 index 0000000000000000000000000000000000000000..8d857bd08d4204214c5e7e06d623fa48993ec597 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Range.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +struct Range { + Range(int64_t begin, int64_t end) + : begin(begin) + , end(end) {} + + int64_t size() const { return end - begin; } + + Range operator/(int64_t divisor) { + return Range(begin / divisor, end / divisor); + } + + int64_t begin; + int64_t end; +}; + +std::ostream& operator<<(std::ostream& out, const Range& range); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h new file mode 100644 index 0000000000000000000000000000000000000000..8f8cb7650bd48a58c4e79701853d8b00732bbb02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +namespace at::Reduction { + +// NB: Keep this in sync with Reduction class in torch/nn/_reduction.py +// These constants control the reduction behavior of loss functions. +// Ideally, this would be a scoped enum, but jit doesn't support that +enum Reduction { + None, // Do not reduce + Mean, // (Possibly weighted) mean of losses + Sum, // Sum losses + END +}; +} // namespace at::Reduction + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h new file mode 100644 index 0000000000000000000000000000000000000000..be3507a9784f176823a82e035c581dfde946efa5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h new file mode 100644 index 0000000000000000000000000000000000000000..97945e1e57a437a0e46f2bbefad76e1f34280828 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..cc8558a2bbe45f6d373f24d6a0e02e8795537240 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class TORCH_API OptionalTensorRef { + public: + OptionalTensorRef() = default; + + ~OptionalTensorRef() { + ref_.unsafeReleaseTensorImpl(); + } + + OptionalTensorRef(const TensorBase& src) + : ref_(Tensor::unsafe_borrow_t{}, src) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src.defined()); + } + + OptionalTensorRef(const OptionalTensorRef& rhs) + : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {} + + OptionalTensorRef(OptionalTensorRef&& rhs) = default; + OptionalTensorRef& operator=(OptionalTensorRef rhs) { + std::swap(ref_, rhs.ref_); + return *this; + } + + bool has_value() const { + return ref_.defined(); + } + + const Tensor& getTensorRef() const & { + return ref_; + } + + const Tensor& operator*() const & { + return ref_; + } + + const Tensor* operator->() const & { + return &ref_; + } + + operator bool() const { + return ref_.defined(); + } + + private: + Tensor ref_; +}; + +// Use to convert a TensorBase (that may be undefined) to an at::Tensor +// without bumping refcount. +class TORCH_API TensorRef { + public: + ~TensorRef() { + ref_.unsafeReleaseTensorImpl(); + } + + TensorRef(const TensorBase& src) + : ref_(Tensor::unsafe_borrow_t{}, src) {} + TensorRef(TensorRef&& other) = default; + TensorRef(const TensorRef&) = default; + TensorRef& operator=(const TensorRef&) = default; + TensorRef& operator=(TensorRef&&) = default; + + const Tensor& operator*() const & { + return ref_; + } + private: + Tensor ref_; +}; + +template +auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_void_t { + // Return the grad argument in case of a hook with void return type to have an + // std::function with Tensor return type + static_assert(std::is_same_v, + "Expected hook to return void"); + return _register_hook([fn=std::forward(hook)](const TensorBase& grad_base) { + TensorRef grad(grad_base); + fn(*grad); + return Tensor(); + }); +} + +template +auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_var_t { + return _register_hook([fn=std::forward(hook)](const TensorBase& grad_base) { + TensorRef grad(grad_base); + Tensor ret = fn(*grad); + return TensorBase(std::move(ret)); + }); +} + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h new file mode 100644 index 0000000000000000000000000000000000000000..4fc9c1e8a55cbf9edb187d808bf25f964db38d54 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +using torch::headeronly::DefaultPtrTraits; +#if defined(__CUDACC__) || defined(__HIPCC__) + using torch::headeronly::RestrictPtrTraits; +#endif + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using TensorAccessorBase = torch::headeronly::detail::TensorAccessorBase; + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using TensorAccessor = torch::headeronly::detail::TensorAccessor; + +namespace detail { + +template +struct IndexBoundsCheck { + IndexBoundsCheck(index_t i) { + TORCH_CHECK_INDEX( + 0 <= i && i < index_t{N}, + "Index ", + i, + " is not within bounds of a tensor of dimension ", + N); + } +}; +} // namespace detail + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using GenericPackedTensorAccessorBase = torch::headeronly::detail::GenericPackedTensorAccessorBase, T, N, PtrTraits, index_t>; + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using GenericPackedTensorAccessor = torch::headeronly::detail::GenericPackedTensorAccessor, detail::IndexBoundsCheck, T, N, PtrTraits, index_t>; + +// Can't put this directly into the macro function args because of commas +#define AT_X GenericPackedTensorAccessor + +// Old name for `GenericPackedTensorAccessor` +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +C10_DEFINE_DEPRECATED_USING(PackedTensorAccessor, AT_X) + +#undef AT_X + +template class PtrTraits = DefaultPtrTraits> +using PackedTensorAccessor32 = GenericPackedTensorAccessor; + +template class PtrTraits = DefaultPtrTraits> +using PackedTensorAccessor64 = GenericPackedTensorAccessor; +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h new file mode 100644 index 0000000000000000000000000000000000000000..3244b247a214a9aa086a17adb4b20db6c0afa295 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h @@ -0,0 +1,1098 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// See https://github.com/pytorch/pytorch/issues/161660 +// This compile flag is intended to be passed in to CppExtensions that rely on +// the stable ABI via the `extra_compile_args` argument. This is a stopgap +// solution to ensure that non-stable libtorch APIs are not used in the extension. +// The long term solution is to have a torch_stable target that excludes headers +// that are not in torch/stable or torch/headeronly. +// See test/cpp_extensions/torch_stable_test_extension/setup.py for an example +// of how this is used. +#ifdef TORCH_STABLE_ONLY +#error \ + "TensorBase.h should not be included when TORCH_STABLE_ONLY compile flag is passed" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { +class Scalar; +} + +namespace torch::autograd { + +struct Node; + +} // namespace torch::autograd + +namespace at { + +class Tensor; +class TensorBase; + +// Convert Tensor to TensorBase without any need to include Tensor.h +TORCH_API const TensorBase& get_tensor_base(const Tensor& t); + +namespace impl { +inline bool variable_excluded_from_dispatch() { +#ifdef C10_MOBILE + // Please read the comment in `VariableFallbackKernel.cpp` about the background of this change. + return true; +#else + return c10::impl::tls_local_dispatch_key_set().excluded_.isSupersetOf(c10::autograd_dispatch_keyset); +#endif +} + +} + +// NOTE: [Tensor vs. TensorBase] +// +// Tensor, being the central data structure in PyTorch, gets used and +// its header included almost everywhere. Unfortunately this means +// every time an operator signature is updated or changed in +// native_functions.yaml, you (and every other PyTorch developer) need +// to recompile all of ATen and its dependencies. +// +// TensorBase aims to break up these header dependencies, and improve +// incremental build times for all PyTorch developers. TensorBase +// represents a reference counted handle to TensorImpl, exactly the +// same as Tensor. However, TensorBase doesn't have code generated +// methods in its API and thus no dependence on native_functions.yaml. +// +// Usage tips +// ---------- +// - You can `#define TORCH_ASSERT_NO_OPERATORS` at the top of a .cpp +// or .cu file to ensure it has no header dependencies on +// native_functions.yaml (direct or indirect). +// - Tensor inherits from TensorBase, so functions taking +// `const TensorBase &` are callable with Tensor as well. +// - TensorBase can be converted to Tensor with `Tensor(tensor_base)`, +// but this requires a reference-count bump. OptionalTensorRef, on +// the other hand, can materialize a `const Tensor &` without +// touching the reference-count. +class TORCH_API TensorBase { + public: + struct unsafe_borrow_t { explicit unsafe_borrow_t() = default; }; + + protected: + // Create a Tensor with a +0 reference count. Special care must be + // taken to avoid decrementing this reference count at destruction + // time. Intended to support MaybeOwnedTraits. + explicit TensorBase(unsafe_borrow_t /*unused*/, const TensorBase& rhs) + : impl_(c10::intrusive_ptr(rhs.impl_.get(), c10::raw::DontIncreaseRefcount{})) {} + friend MaybeOwnedTraits; + + public: + TensorBase() = default; + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit TensorBase( + c10::intrusive_ptr tensor_impl) + : impl_(std::move(tensor_impl)) { + TORCH_CHECK(impl_.get(), "TensorImpl with nullptr is not supported"); + } + TensorBase(const TensorBase&) = default; + TensorBase(TensorBase&&) noexcept = default; + ~TensorBase() noexcept = default; + + public: + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static TensorBase wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + TensorBase r(std::move(tensor_impl)); + r.enforce_invariants(); + return r; + } + + int64_t dim() const { + return impl_->dim(); + } + int64_t storage_offset() const { + return impl_->storage_offset(); + } + + TensorBase contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const { + if (is_contiguous_or_false(memory_format)) { + return *this; + } else { + return __dispatch_contiguous(memory_format); + } + } + + /// Should be used if *this can reasonably be expected to be contiguous and + /// performance is important. + /// Compared to contiguous, it saves a reference count + /// increment/decrement if *this is already contiguous, at the cost + /// in all cases of an extra pointer of stack usage, an extra branch + /// to access, and an extra branch at destruction time. + c10::MaybeOwned expect_contiguous( + MemoryFormat memory_format=MemoryFormat::Contiguous) const &; + + // Use .contiguous() instead. Trying to borrow from a prvalue + // will only lead to trouble and dangling references. + c10::MaybeOwned expect_contiguous( + MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete; + + const TensorBase& fill_(const c10::Scalar& scalar) const; + const TensorBase& zero_() const; + + TensorBase to(at::TensorOptions options={}, bool non_blocking=false, bool copy=false, std::optional memory_format=std::nullopt) const; + + bool is_complex() const { + return at::isComplexType(this->scalar_type()); + } + + bool is_floating_point() const { + return at::isFloatingType(this->scalar_type()); + } + + bool is_signed() const { + return at::isSignedType(this->scalar_type()); + } + + c10::SymInt sym_size(int64_t dim) const { + return impl_->sym_size(dim); + } + + c10::SymInt sym_stride(int64_t dim) const { + const auto sizes = this->sym_strides(); + const auto ndim = static_cast(sizes.size()); + // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping) + return sizes[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)]; + + } + + int64_t size(int64_t dim) const { + return impl_->size(dim); + } + + int64_t stride(int64_t dim) const { + const auto strides = this->strides(); + const auto ndim = static_cast(strides.size()); + // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping) + return strides[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)]; + } + + TensorImpl * unsafeGetTensorImpl() const { + return impl_.get(); + } + TensorImpl * unsafeReleaseTensorImpl() { + return impl_.release(); + } + const c10::intrusive_ptr& getIntrusivePtr() const { + return impl_; + } + + c10::intrusive_ptr unsafeReleaseIntrusivePtr() { + return std::move(impl_); + } + + bool defined() const { + return impl_; + } + + void reset() { + impl_.reset(); + } + +#if defined (_MSC_VER) + TensorBase& operator=(const TensorBase& x) & { + impl_ = x.impl_; + return *this; + }; + TensorBase& operator=(TensorBase&& x) & noexcept { + impl_ = std::move(x.impl_); + return *this; + } +#else + TensorBase& operator=(const TensorBase& x) & = default; + TensorBase& operator=(TensorBase&& x) & noexcept = default; +#endif + + // Ban assignment to rvalues, since at::Tensor (weirdly) performs a deep copy here + TensorBase& operator=(const TensorBase&) && = delete; + TensorBase& operator=(TensorBase&&) && noexcept = delete; + + bool is_same(const TensorBase& other) const noexcept { + return impl_ == other.impl_; + } + size_t use_count() const noexcept { + return impl_.use_count(); + } + size_t weak_use_count() const noexcept { + return impl_.weak_use_count(); + } + bool is_uniquely_owned() const noexcept { + return impl_.is_uniquely_owned(); + } + + std::string toString() const; + + IntArrayRef sizes() const { + return impl_->sizes(); + } + c10::SymIntArrayRef sym_sizes() const { + return impl_->sym_sizes(); + } + c10::SymIntArrayRef sym_strides() const { + return impl_->sym_strides(); + } + IntArrayRef strides() const { + return impl_->strides(); + } + // See impl::get_opt_names in ATen/NamedTensor.h for docs. + std::optional opt_names() const { + return impl::get_opt_names(unsafeGetTensorImpl()); + } + // See impl::get_names in ATen/NamedTensor.h for docs. + DimnameList names() const { + return impl::get_names(unsafeGetTensorImpl()); + } + int64_t ndimension() const { + return dim(); + } + + bool is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + return impl_->is_contiguous(memory_format); + } + + // Like is_contiguous, but more dynamic shape-friendly. May return a symbolic representation of + // contiguity instead of SymTrue SymFalse, when results are data-dependent. + c10::SymBool sym_is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + if (impl_->has_symbolic_sizes_strides()) { + return impl_->sym_is_contiguous(memory_format); + } + return impl_->is_contiguous(memory_format); + } + + // Like is_contiguous, but more dynamic shape-friendly. Can returns + // false instead of throwing data-dependent errors for tensors with unbacked + // sizes or strides. + bool is_contiguous_or_false(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + if (impl_->has_symbolic_sizes_strides()) { + return impl_->sym_is_contiguous(memory_format).guard_or_false(__FILE__, __LINE__); + } + return impl_->is_contiguous(memory_format); + } + + bool is_non_overlapping_and_dense() const { + return impl_->is_non_overlapping_and_dense(); + } + + at::MemoryFormat suggest_memory_format( + bool channels_last_strides_exact_match = false) const { + // Setting channels_last_strides_exact_match to true forces function to + // check 0,1 - sized dimension strides. + if (layout() == at::kStrided) { + if (impl_->is_strides_like_channels_last()) { + if (!channels_last_strides_exact_match || + get_channels_last_strides_2d(sizes()) == strides()) { + return at::MemoryFormat::ChannelsLast; + } + } + else if (impl_->is_strides_like_channels_last_3d()) { + if (!channels_last_strides_exact_match || + get_channels_last_strides_3d(sizes()) == strides()) { + return at::MemoryFormat::ChannelsLast3d; + } + } + } + return at::MemoryFormat::Contiguous; + } + + // Total bytes consumed by the "view" of elements of the array. Does not + // include size of metadata. The number reported here does not necessarily + // correspond to the true physical memory consumed by a tensor; instead, + // it reports the memory the tensor would take *if* it were contiguous. + // Defined to be numel() * itemsize() + size_t nbytes() const { + TORCH_CHECK(layout () != at::kSparse, + "nbytes is not defined for sparse tensors. If you want the size of the constituent " \ + "tensors, add the nbytes of the indices and values. If you want the size of the " \ + "equivalent dense tensor, multiply numel() by element_size()"); + return impl_->numel() * impl_->itemsize(); + } + + c10::SymInt sym_nbytes() const { + TORCH_CHECK(layout () != at::kSparse, + "nbytes is not defined for sparse tensors. If you want the size of the constituent " \ + "tensors, add the nbytes of the indices and values. If you want the size of the " \ + "equivalent dense tensor, multiply numel() by element_size()"); + return impl_->sym_numel() * impl_->itemsize(); + } + + int64_t numel() const { + return impl_->numel(); + } + + c10::SymInt sym_numel() const { + return impl_->sym_numel(); + } + + c10::SymInt sym_storage_offset() const { + return impl_->sym_storage_offset(); + } + + // Length of one array element in bytes. This is the traditional + // Numpy naming. + size_t itemsize() const { + return impl_->itemsize(); + } + + // Same as itemsize(). This is the PyTorch naming. + int64_t element_size() const { + return static_cast(impl_->itemsize()); + } + + DispatchKeySet key_set() const { + return impl_->key_set(); + } + ScalarType scalar_type() const { + return typeMetaToScalarType(impl_->dtype()); + } + bool has_storage() const { + return defined() && impl_->has_storage(); + } + const Storage& storage() const { + return impl_->storage(); + } + bool is_alias_of(const at::TensorBase& other) const{ + return impl_->storage().is_alias_of(other.storage()); + } + + // Move the storage backend to shm based + // to enable memory sharing across processes. + // + // NB1: the ideal behavior of this API still requires further discussion + // but for now we are inclined to keep it consistent with existing THP behavior + // https://github.com/pytorch/pytorch/blob/4dca9bde0552afc67b5b74f4a0696fe6055709c4/torch/storage.py#L196-L212 + // so we don't assert on anything here and rely on caller knowing + // what it's doing. + // + // NB2: this currently provides Linux fd based shm support only + // to simplify the storage lifetime management logic in ATen + // and similarly for now we are not adding support for file system based + // shm support like in THP due to additional GC manager support needed + // to prevent leaks. + // As such, calling this from non supported systems (e.g. Windows) would fail. + void share_memory_() { + at::share_memory_(*this); + } + + inline bool _is_zerotensor() const { + return impl_->_is_zerotensor(); + } + + inline void _set_zero(bool zero) const { + impl_->_set_zero(zero); + } + + inline bool is_conj() const { + return impl_->is_conj(); + } + + // sets the conjugate bit of a tensor. + // NOTE: Conjugate bit is supposed to be a read-only field. Only change this, if you are sure + // that's what you want. Changing this might lead to incorrect behavior since conjugation is + // a lazy operation and we rely on this bit to determine if a conjugation needs to be materialized. + inline void _set_conj(bool conjugate) const { + impl_->_set_conj(conjugate); + } + + inline bool is_neg() const { + return impl_->is_neg(); + } + + // sets the negative bit of a tensor. + // NOTE: Negative bit is supposed to be a read-only field. Only change this, if you are sure + // that's what you want. Changing this might lead to incorrect behavior since we rely on this + // bit to determine if a negation needs to be materialized. + inline void _set_neg(bool negative) const { + impl_->_set_neg(negative); + } + + /// Returns a `Tensor`'s layout. + Layout layout() const { + return impl_->layout(); + } + + /// Returns a `Tensor`'s dtype (`TypeMeta`). + caffe2::TypeMeta dtype() const { + return impl_->dtype(); + } + + /// Returns a `Tensor`'s device. + inline Device device() const { + return impl_->device(); + } + + /// Returns a `Tensor`'s device index. + DeviceIndex get_device() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->get_device(); + } + + /// Returns if a `Tensor` has CPU backend. + bool is_cpu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_cpu(); + } + + /// Returns if a `Tensor` has CUDA backend. + bool is_cuda() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_cuda(); + } + + /// Returns if a `Tensor` has IPU backend. + bool is_ipu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_ipu(); + } + + /// Returns if a `Tensor` has XPU backend. + bool is_xpu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_xpu(); + } + + /// Returns if a `Tensor` has XLA backend. + bool is_xla() const { + return impl_->is_xla(); + } + + /// Returns if a `Tensor` has MTIA backend. + bool is_mtia() const { + return impl_->is_mtia(); + } + + /// Returns if a `Tensor` has HPU backend. + bool is_hpu() const { + return impl_->is_hpu(); + } + + /// Returns if a `Tensor` has Lazy backend. + bool is_lazy() const { + return impl_->is_lazy(); + } + + /// Returns if a `Tensor` has HIP backend. + bool is_hip() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_hip(); + } + + /// Returns if a `Tensor` has VE backend. + bool is_ve() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_ve(); + } + + /// Returns if a `Tensor` has PrivateUse1 backend. + bool is_privateuseone() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_privateuseone(); + } + + /// Returns if a `Tensor` has sparse backend. + bool is_sparse() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_sparse(); + } + + /// Returns is a `Tensor` has a sparse CSR backend. + bool is_sparse_csr() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_sparse_csr(); + } + + /// Returns if a `Tensor` is mkldnn tensor. + bool is_mkldnn() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_mkldnn(); + } + + /// Returns if a `Tensor` is mps tensor. + bool is_mps() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_mps(); + } + + /// Returns if a `Tensor` is maia tensor. + bool is_maia() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_maia(); + } + + /// Returns if a `Tensor` is vulkan tensor. + bool is_vulkan() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_vulkan(); + } + + /// Returns if a `Tensor` is metal tensor. + bool is_metal() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_metal(); + } + + /// Returns if a `Tensor` has quantized backend. + bool is_quantized() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_quantized(); + } + + /// Returns if a `Tensor` is a meta tensor. Meta tensors can + /// also have other designations. + bool is_meta() const { + return impl_->is_meta(); + } + + /// Returns if a `Tensor` is an inference tensor. + bool is_inference() const { + return impl_->is_inference(); + } + + // Returns if a `Tensor` is a NestedTensor. + bool is_nested() const { + return impl_->is_nested(); + } + + /// If a tensor is a quantized tensor, returns its quantizer + /// TODO: it's not in native_functions.yaml yet as it's not exposed to python + QuantizerPtr quantizer() const; + + /// Returns if a `Tensor` has any dimension names + bool has_names() const { + // If a user is using unnamed tensors, then we can short-circuit right here. + // Otherwise, impl::has_names attempts to retrieve names. + if (!impl_->has_named_tensor_meta()) { + return false; + } + return impl::has_names(unsafeGetTensorImpl()); + } + + /// Returns a `Tensor`'s dimension names data structure + const NamedTensorMeta* get_named_tensor_meta() const { + return static_cast(impl_->named_tensor_meta()); + } + + NamedTensorMeta* get_named_tensor_meta() { + return static_cast(impl_->named_tensor_meta()); + } + + /// Returns the `TensorOptions` corresponding to this `Tensor`. Defined in + /// TensorOptions.h. + TensorOptions options() const { + return TensorOptions().dtype(dtype()) + .device(device()) + .layout(layout()); + } + + const void* const_data_ptr() const { + return this->unsafeGetTensorImpl()->data(); + } + + void* mutable_data_ptr() const { + return this->unsafeGetTensorImpl()->mutable_data(); + } + + // TODO(#97856) Make this return a const pointer. This currently + // returns a non-const pointer because of the large + // number of clients that we still want to audit before + // migrating to mutable_data_ptr(). + void* data_ptr() const { + return mutable_data_ptr(); + } + + template , int> = 0> + const T* const_data_ptr() const; + + template , int> = 0> + const std::remove_const_t* const_data_ptr() const; + + template + T* mutable_data_ptr() const; + + // Legacy interface during the migration to indicate that a callsite + // has not been audited for mutability. + // + // Do not add new uses of this, use const_data_ptr() if possible, + // mutable_data_ptr() otherwise. + // + // TODO(#97856) Make this return a const pointer. This is currently + // const because of the vast number of clients that + // rely on this. + template + T* data_ptr() const; + + // Purposely not defined here to avoid inlining + void print() const; + + // Return a `TensorAccessor` for CPU `Tensor`s. You have to specify scalar type and + // dimension. + template + TensorAccessor accessor() const& { + static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr()"); + TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim()); + T* ptr = nullptr; + if constexpr (std::is_const_v) { + ptr = const_data_ptr(); + } else { + ptr = mutable_data_ptr(); + } + return TensorAccessor(ptr,sizes().data(),strides().data()); + } + template + TensorAccessor accessor() && = delete; + + // Return a `GenericPackedTensorAccessor` for CUDA `Tensor`s. You have to specify scalar type and + // dimension. You can optionally specify RestrictPtrTraits as a template parameter to + // cast the data pointer to a __restrict__ pointer. + // In order to use this, your CUDA kernel has to take a corresponding GenericPackedTensorAccessor + // as an argument. + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + GenericPackedTensorAccessor generic_packed_accessor() const& { + static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr()"); + TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim()); + T* ptr = nullptr; + if constexpr (std::is_const_v) { + ptr = const_data_ptr(); + } else { + ptr = mutable_data_ptr(); + } + return GenericPackedTensorAccessor(static_cast::PtrType>(ptr),sizes().data(),strides().data()); + } + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + GenericPackedTensorAccessor generic_packed_accessor() && = delete; + + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor32 packed_accessor32() const& { + TORCH_CHECK( + impl_->numel() <= + static_cast(std::numeric_limits::max()), + "numel needs to be smaller than int32_t max; otherwise, please use packed_accessor64"); + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor32 packed_accessor32() && = delete; + + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor64 packed_accessor64() const& { + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor64 packed_accessor64() && = delete; + + // ~~~~~ Autograd API ~~~~~ + + /// \fn bool is_leaf() const; + /// + /// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention. + /// + /// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were + /// created by the user. This means that they are not the result of an operation and so + /// `grad_fn()` is `nullptr`. + /// + /// Only leaf Tensors will have their `grad()` populated during a call to `backward()`. + /// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`. + /// + /// Example: + /// @code + /// auto a = torch::rand(10, torch::requires_grad()); + /// std::cout << a.is_leaf() << std::endl; // prints `true` + /// + /// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA); + /// std::cout << b.is_leaf() << std::endl; // prints `false` + /// // b was created by the operation that cast a cpu Tensor into a cuda Tensor + /// + /// auto c = torch::rand(10, torch::requires_grad()) + 2; + /// std::cout << c.is_leaf() << std::endl; // prints `false` + /// // c was created by the addition operation + /// + /// auto d = torch::rand(10).cuda(); + /// std::cout << d.is_leaf() << std::endl; // prints `true` + /// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine) + /// + /// auto e = torch::rand(10).cuda().requires_grad_(); + /// std::cout << e.is_leaf() << std::endl; // prints `true` + /// // e requires gradients and has no operations creating it + /// + /// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true)); + /// std::cout << f.is_leaf() << std::endl; // prints `true` + /// // f requires grad, has no operation creating it + /// @endcode + + /// \fn void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const; + /// + /// Computes the gradient of current tensor with respect to graph leaves. + /// + /// The graph is differentiated using the chain rule. If the tensor is + /// non-scalar (i.e. its data has more than one element) and requires + /// gradient, the function additionally requires specifying ``gradient``. + /// It should be a tensor of matching type and location, that contains + /// the gradient of the differentiated function w.r.t. this Tensor. + /// + /// This function accumulates gradients in the leaves - you might need to + /// zero them before calling it. + /// + /// \param gradient Gradient w.r.t. the + /// tensor. If it is a tensor, it will be automatically converted + /// to a Tensor that does not require grad unless ``create_graph`` is True. + /// None values can be specified for scalar Tensors or ones that + /// don't require grad. If a None value would be acceptable then + /// this argument is optional. + /// \param retain_graph If ``false``, the graph used to compute + /// the grads will be freed. Note that in nearly all cases setting + /// this option to True is not needed and often can be worked around + /// in a much more efficient way. Defaults to the value of + /// ``create_graph``. + /// \param create_graph If ``true``, graph of the derivative will + /// be constructed, allowing to compute higher order derivative + /// products. Defaults to ``false``. + /// \param inputs Inputs w.r.t. which the gradient will be accumulated into + /// ``at::Tensor::grad``. All other Tensors will be ignored. If not + /// provided, the gradient is accumulated into all the leaf Tensors + /// that were used to compute the current tensor. + /// When inputs are provided and a given input is not a leaf, + /// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients). + /// It is an implementation detail on which the user should not rely. + /// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details. + + /// \fn Tensor detach() const; + /// + /// Returns a new Tensor, detached from the current graph. + /// The result will never require gradient. + + /// \fn Tensor & detach_() const; + /// + /// Detaches the Tensor from the graph that created it, making it a leaf. + /// Views cannot be detached in-place. + + /// \fn void retain_grad() const; + /// + /// Enables this Tensor to have their :attr:`grad` populated during + /// :func:`backward`. This is a no-op for leaf tensors. + + /// \fn bool retains_grad() const; + /// + /// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be + /// populated during :func:`backward`, ``false`` otherwise. + + const TensorBase& set_requires_grad(bool requires_grad) const { + impl_->set_requires_grad(requires_grad); + return *this; + } + bool requires_grad() const { + return impl_->requires_grad(); + } + + // The Forward AD API functions below are low level and are not to be used by end + // users who should use the API provided in torch/csrc/autograd.h + + /// This function returns the forward gradient for this Tensor at the given level. + const Tensor& _fw_grad(uint64_t level) const { + return impl_->_fw_grad(level, *this); + } + + /// This function can be used to set the value of the forward grad. + /// Note that the given new_grad might not be used directly if it has different + /// metadata (size/stride/storage offset) compared to this Tensor. In that case, + /// new_grad content will be copied into a new Tensor + void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const { + impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op); + } + + /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended + /// to be used from functions that need to access the `Variable`'s equivalent `Tensor` + /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`). + /// + /// One notable difference with the legacy `.data()` function is that changes to the + /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset) + /// will not update the original `Variable`, due to the fact that this function + /// shallow-copies the `Variable`'s underlying TensorImpl. + at::TensorBase tensor_data() const; + + /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data` + /// in Python, which create a new `Variable` that shares the same storage and + /// tensor metadata with the original `Variable`, but with a completely new + /// autograd history. + /// + /// NOTE: If we change the tensor metadata (e.g. sizes / strides / + /// storage / storage_offset) of a variable created from `var.variable_data()`, those + /// changes will not update the original variable `var`. In `.variable_data()`, we set + /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal, + /// in order to prevent users from changing metadata of `var.variable_data()` + /// and expecting the original variable `var` to also be updated. + at::TensorBase variable_data() const; + + // Gradient Node and Edges + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /// Gets the gradient function of the `Variable`. If this is a leaf variable, + /// the pointer returned will be null. + /// + /// For View Variables: + /// Gets the up-to-date grad_fn. If the shared data or base was modified, we + /// re-create the grad_fn to express the up-to-date view relationship between + /// this and the base Variable. + const std::shared_ptr& grad_fn() const; + + // Hooks + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + template + using hook_return_void_t = std::enable_if_t>, unsigned>; + template + using hook_return_var_t = std::enable_if_t, TensorBase>, unsigned>; + + /// Registers a backward hook. + /// + /// The hook will be called every time a gradient with respect to the Tensor is computed. + /// The hook should have one of the following signature: + /// ``` + /// hook(TensorBase grad) -> TensorBase + /// ``` + /// ``` + /// hook(TensorBase grad) -> void + /// ``` + /// The hook should not modify its argument, but it can optionally return a new gradient + /// which will be used in place of `grad`. + /// + /// This function returns the index of the hook in the list which can be used to remove hook. + /// + /// Example: + /// @code + /// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad()); + /// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient + /// v.backward(torch::tensor({1., 2., 3.})); + /// // This prints: + /// // ``` + /// // 2 + /// // 4 + /// // 6 + /// // [ CPUFloatType{3} ] + /// // ``` + /// std::cout << v.grad() << std::endl; + /// v.remove_hook(h); // removes the hook + /// @endcode + template + hook_return_void_t register_hook(T&& hook) const; + template + hook_return_var_t register_hook(T&& hook) const; + +protected: + unsigned _register_hook(std::function hook) const; + +public: + + /// Remove hook at given position + void remove_hook(unsigned pos) const; + + // Variable methods + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + bool is_leaf() const; + + int64_t output_nr() const; + + void set_data(const TensorBase & new_data) const; + + TensorBase data() const; + + int64_t _version() const; + + void retain_grad() const; + + bool retains_grad() const; + + const TensorBase& requires_grad_(bool _requires_grad=true) const; + + std::optional grad_dtype() const; + + void set_grad_dtype(const std::optional& grad_dtype) const; + + // View Variables + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /// Returns true if this `Variable` is a view of another `Variable`. + bool is_view() const; + + /// Returns the `Variable` that this `Variable` is a view of. If this + /// `Variable` is not a view, throw a `std::runtime_error`. + const TensorBase& _base() const; + + // Miscellaneous + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + const std::string& name() const; + +protected: + void enforce_invariants(); + c10::intrusive_ptr impl_; + +private: + TensorBase __dispatch_contiguous(c10::MemoryFormat /*memory_format*/) const; +}; + +inline DeviceIndex get_device(const TensorBase& self) { + return self.get_device(); +} + +template +auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_void_t { + // Return the grad argument in case of a hook with void return type to have an + // std::function with Tensor return type + static_assert(std::is_same_v, + "Expected hook to return void"); + return _register_hook([fn=std::forward(hook)](const TensorBase& grad) { + fn(grad); + return TensorBase(); + }); +} + +template +auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_var_t { + return _register_hook(std::forward(hook)); +} + +namespace detail { +// Helper creator for Tensor class which doesn't requires the users to pass +// in an intrusive_ptr instead it just converts the argument passed to +// requested intrusive_ptr type. +template +TensorBase make_tensor_base(Args&&... args) { + return TensorBase(c10::make_intrusive(std::forward(args)...)); +} + +} // namespace detail + +inline DispatchKey legacyExtractDispatchKey(const TensorBase& t) { + return legacyExtractDispatchKey(t.key_set()); +} + +} // namespace at + +namespace c10 { +template <> +struct MaybeOwnedTraits { + using owned_type = at::TensorBase; + using borrow_type = at::TensorBase; + + static borrow_type createBorrow(const owned_type& from) { + // NOTE: this can be implemented without the special + // unsafe_borrow_t Tensor constructor as + // + // return borrow_type(c10::intrusive_ptr::reclaim(from.unsafeGetTensorImpl())); + // + // but that hurts inlining due to the nullptr check in the + // Tensor(c10::intrusive_ptr<...>) constructor. We already know + // that from.impl_ isn't null because from is a valid Tensor, so + // we needn't do the check again. (using __builtin_assume can + // avoid this, but wouldn't be portable to MSVC.) + return borrow_type(borrow_type::unsafe_borrow_t{}, from); + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.unsafeReleaseTensorImpl(); + // See above note: this can be implemented with public API + // similarly to createBorrow(), but that would hurt inlining. + lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs); + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0. + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + return true; + } +}; + +template <> +struct ExclusivelyOwnedTraits : public c10::ExclusivelyOwnedTensorTraits {}; +} // namespace c10 + +namespace at { + +inline c10::MaybeOwned borrow_from_optional_tensor( + const std::optional& opt) { + return opt.has_value() + ? c10::MaybeOwned::borrowed(*opt) + : c10::MaybeOwned::owned(std::in_place); +} + +inline c10::MaybeOwned TensorBase::expect_contiguous(MemoryFormat memory_format) const & { + if (is_contiguous(memory_format)) { + return c10::MaybeOwned::borrowed(*this); + } else { + return c10::MaybeOwned::owned(__dispatch_contiguous(memory_format)); + } +} + +namespace symint { + +template +using enable_if_symint = std::enable_if_t>; +template +using enable_if_int = std::enable_if_t>; + +template > +c10::SymIntArrayRef sizes(const TensorBase& t) { return t.sym_sizes(); } +template > +IntArrayRef sizes(const TensorBase& t) { return t.sizes(); } + +template > +c10::SymInt size(const TensorBase& t, int64_t dim) { return t.sym_size(dim); } +template > +int64_t size(const TensorBase& t, int64_t dim) { return t.size(dim); } + +template > +c10::SymIntArrayRef strides(const TensorBase& t) { return t.sym_strides(); } +template > +IntArrayRef strides(const TensorBase& t) { return t.strides(); } + +template > +c10::SymInt numel(const TensorBase& t) { return t.sym_numel(); } +template > +int64_t numel(const TensorBase& t) { return t.numel(); } + +} // namespace symint + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h new file mode 100644 index 0000000000000000000000000000000000000000..f67ce6fbfbcfce68be22d95d2758ad28799a2c72 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h @@ -0,0 +1,5799 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef TORCH_ASSERT_NO_OPERATORS +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if your change would be better placed in \ + another file, or if a more specific header might achieve the same goal. \ + See NOTE: [Tensor vs. TensorBase] +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +namespace c10{ +template class List; +template class IListRef; +} +namespace at { +struct Generator; +struct Type; +class DeprecatedTypeProperties; +class Tensor; +} // namespace at +namespace at { +namespace indexing { +struct TensorIndex; +} // namespace indexing +} // namespace at + +namespace torch { namespace autograd { + +struct Node; + +}} // namespace torch::autograd + +namespace at { + +class OptionalTensorRef; +class TensorRef; +class Tensor; +using TensorList = ArrayRef; +using ITensorList = c10::IListRef; + +using Stream = c10::Stream; + +// Tensor is a "generic" object holding a pointer to the underlying TensorImpl object, which +// has an embedded reference count. In this way, Tensor is similar to boost::intrusive_ptr. +// +// For example: +// +// void func(Tensor a) { +// Tensor b = a; +// ... +// } +// +// In this example, when we say Tensor b = a, we are creating a new object that points to the +// same underlying TensorImpl, and bumps its reference count. When b goes out of scope, the +// destructor decrements the reference count by calling release() on the TensorImpl it points to. +// The existing constructors, operator overloads, etc. take care to implement the correct semantics. +// +// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and +// special care must be taken to handle this. +class TORCH_API Tensor: public TensorBase { + protected: + // Create a Tensor with a +0 reference count. Special care must be + // taken to avoid decrementing this reference count at destruction + // time. Intended to support MaybeOwnedTraits. + explicit Tensor(unsafe_borrow_t, const TensorBase& rhs): TensorBase(unsafe_borrow_t{}, rhs) {} + friend MaybeOwnedTraits; + friend OptionalTensorRef; + friend TensorRef; + + public: + Tensor() = default; + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit Tensor( + c10::intrusive_ptr tensor_impl) + : TensorBase(std::move(tensor_impl)) {} + Tensor(const Tensor &tensor) = default; + Tensor(Tensor &&tensor) = default; + + // Implicitly move-constructible from TensorBase, but must be explicit to increase refcount + explicit Tensor(const TensorBase &base): TensorBase(base) {} + /*implicit*/ Tensor(TensorBase &&base): TensorBase(std::move(base)) {} + + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static Tensor wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + return TensorBase::wrap_tensor_impl(std::move(tensor_impl)); + } + + Tensor contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const { + return TensorBase::contiguous(memory_format); + } + + Tensor conj() const { + if (!this->is_complex()) { + return *this; + } + + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (this->layout()) { + case at::kSparse: + case at::kSparseCsr: + case at::kSparseCsc: + case at::kSparseBsr: + case at::kSparseBsc: + return this->conj_physical(); + default: + return this->_conj(); + } + C10_DIAGNOSTIC_POP() + } + + // Aliased by Dimname overloads, so need explicit using + using TensorBase::size; + using TensorBase::sym_size; + using TensorBase::stride; + + /// Should be used if *this can reasonably be expected to be contiguous and + /// performance is important. + /// Compared to contiguous, it saves a reference count + /// increment/decrement if *this is already contiguous, at the cost + /// in all cases of an extra pointer of stack usage, an extra branch + /// to access, and an extra branch at destruction time. + c10::MaybeOwned expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const &; + + // Use .contiguous() instead. Trying to borrow from a prvalue Tensor + // will only lead to trouble and dangling references. + c10::MaybeOwned expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete; + + // The following overloads are very intriguing. Consider the following + // program: + // + // x[1] = 3; + // + // We would expect that the first entry of x is written to 3. But how can we + // actually achieve this? x[1] evaluates to a tensor... + // + // The answer is, using a ref-qualifier. x[1] is an rvalue, which cannot be + // (profitably) assigned to in the traditional sense, so we overload + // assignment to mean, "Actually, copy 3 into the tensor data." This is done + // with an rvalue-reference ref-qualified overload (the methods with && at the + // end of their type.) + // + // There's one more fly in the ointment: We also want + // + // Tensor x = y; + // + // to work, and we want it NOT to copy. So we need a traditional operator= + // overload. But we MUST specify a mutable lvalue ref-qualifier, to + // disambiguate the traditional overload from the rvalue-reference + // ref-qualified overload. Otherwise, it will be ambiguous, because + // a non ref-qualified method is eligible for all situations. + + // Unfortunately, we have to write these constructors out manually + // to work around an MSVC bug: + // error C2580: 'at::Tensor &at::Tensor::operator =(const at::Tensor &) &': + // multiple versions of a defaulted special member functions are not allowed + // Tensor& operator=(const Tensor&) & = default; + // Tensor& operator=(Tensor&&) & = default; + + // Also MSVC will wrongly issue the following warning with the aforementioned fix + // warning C4522: 'at::Tensor': multiple assignment operators specified + // Let's just skip the warning. + // + // TODO: temporarily disabled + + Tensor& operator=(const TensorBase& x) & noexcept { + impl_ = x.getIntrusivePtr(); + return *this; + } + Tensor& operator=(TensorBase&& x) & noexcept { + impl_ = x.unsafeReleaseIntrusivePtr(); + return *this; + } + + Tensor& operator=(const Tensor &x) & noexcept { + return operator=(static_cast(x)); + } + Tensor& operator=(Tensor &&x) & noexcept { + return operator=(static_cast(x)); + } + + Tensor& operator=(const Scalar &v) && { + return fill_(v); + } + Tensor& operator=(const Tensor &rhs) && { + return copy_(rhs); + } + Tensor& operator=(Tensor&& rhs) && { + return copy_(rhs); + } + + C10_DEPRECATED_MESSAGE("Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device().") + DeprecatedTypeProperties & type() const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + dispatchKeyToBackend(legacyExtractDispatchKey(key_set())), + scalar_type()); + } + + Tensor toType(ScalarType t) const { + return to(options().dtype(t), /*non_blocking*/ false, /*copy*/ false); + } + + // TODO: Deprecate me + Tensor toBackend(Backend b) const { + return to(options().device(backendToDeviceType(b)).layout(layout_from_backend(b)), /*non_blocking*/ false, /*copy*/ false); + } + + C10_DEPRECATED_MESSAGE("Tensor.is_variable() is deprecated; everything is a variable now. (If you want to assert that variable has been appropriately handled already, use at::impl::variable_excluded_from_dispatch())") + bool is_variable() const noexcept { + return !at::impl::variable_excluded_from_dispatch(); + } + + template + C10_DEPRECATED_MESSAGE("Tensor.data() is deprecated. Please use Tensor.data_ptr() instead.") + T * data() const { + return data_ptr(); + } + + template + T item() const; + + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead") + GenericPackedTensorAccessor packed_accessor() const & { + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead") + GenericPackedTensorAccessor packed_accessor() && = delete; + + Tensor operator~() const { + return bitwise_not(); + } + Tensor operator-() const { + return neg(); + } + Tensor& operator+=(const Tensor & other) { + return add_(other); + } + Tensor& operator+=(const Scalar & other) { + return add_(other); + } + Tensor& operator-=(const Tensor & other) { + return sub_(other); + } + Tensor& operator-=(const Scalar & other) { + return sub_(other); + } + Tensor& operator*=(const Tensor & other) { + return mul_(other); + } + Tensor& operator*=(const Scalar & other) { + return mul_(other); + } + Tensor& operator/=(const Tensor & other) { + return div_(other); + } + Tensor& operator/=(const Scalar & other) { + return div_(other); + } + Tensor& operator&=(const Tensor & other) { + return bitwise_and_(other); + } + Tensor& operator|=(const Tensor & other) { + return bitwise_or_(other); + } + Tensor& operator^=(const Tensor & other) { + return bitwise_xor_(other); + } + Tensor operator[](const Scalar & index) const { + if (!index.isIntegral(false)) { + TORCH_CHECK_INDEX(false, "Can only index tensors with integral scalars"); + } + return this->operator[](index.toLong()); + } + Tensor operator[](const Tensor & index) const { + // These properties are checked in the Scalar constructor, but we already + // check them here to provide more useful diagnostics for the user. + if (!index.defined()) { + TORCH_CHECK_INDEX(false, "Can only index with tensors that are defined"); + } + if (index.dim() != 0) { + TORCH_CHECK_INDEX(false, + "Can only index with tensors that are scalars (zero-dim)"); + } + // The Scalar(Tensor) constructor is explicit, so we need to call it. + return this->operator[](index.item()); + } + Tensor operator[](int64_t index) const { + return select(0, index); + } + + Tensor index(ArrayRef indices) const; + Tensor index(std::initializer_list indices) const; + + Tensor & index_put_(ArrayRef indices, Tensor const & rhs); + Tensor & index_put_(ArrayRef indices, const Scalar& v); + Tensor & index_put_(std::initializer_list indices, Tensor const & rhs); + Tensor & index_put_(std::initializer_list indices, const Scalar& v); + + Tensor cpu() const { + return to(options().device(c10::DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false); + } + + // TODO: The Python version also accepts arguments + Tensor cuda() const { + return to(options().device(c10::DeviceType::CUDA), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor hip() const { + return to(options().device(c10::DeviceType::HIP), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor ve() const { + return to(options().device(c10::DeviceType::VE), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor vulkan() const { + return to(options().device(c10::DeviceType::Vulkan), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor metal() const { + return to(options().device(c10::DeviceType::Metal), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor meta() const { + return to(options().device(c10::DeviceType::Meta), /*non_blocking*/ false, /*copy*/ false); + } + + // ~~~~~ Autograd API ~~~~~ + + /// \fn bool is_leaf() const; + /// + /// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention. + /// + /// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were + /// created by the user. This means that they are not the result of an operation and so + /// `grad_fn()` is `nullptr`. + /// + /// Only leaf Tensors will have their `grad()` populated during a call to `backward()`. + /// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`. + /// + /// Example: + /// @code + /// auto a = torch::rand(10, torch::requires_grad()); + /// std::cout << a.is_leaf() << std::endl; // prints `true` + /// + /// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA); + /// std::cout << b.is_leaf() << std::endl; // prints `false` + /// // b was created by the operation that cast a cpu Tensor into a cuda Tensor + /// + /// auto c = torch::rand(10, torch::requires_grad()) + 2; + /// std::cout << c.is_leaf() << std::endl; // prints `false` + /// // c was created by the addition operation + /// + /// auto d = torch::rand(10).cuda(); + /// std::cout << d.is_leaf() << std::endl; // prints `true` + /// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine) + /// + /// auto e = torch::rand(10).cuda().requires_grad_(); + /// std::cout << e.is_leaf() << std::endl; // prints `true` + /// // e requires gradients and has no operations creating it + /// + /// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true)); + /// std::cout << f.is_leaf() << std::endl; // prints `true` + /// // f requires grad, has no operation creating it + /// @endcode + + /// \fn void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const; + /// + /// Computes the gradient of current tensor with respect to graph leaves. + /// + /// The graph is differentiated using the chain rule. If the tensor is + /// non-scalar (i.e. its data has more than one element) and requires + /// gradient, the function additionally requires specifying ``gradient``. + /// It should be a tensor of matching type and location, that contains + /// the gradient of the differentiated function w.r.t. this Tensor. + /// + /// This function accumulates gradients in the leaves - you might need to + /// zero them before calling it. + /// + /// \param gradient Gradient w.r.t. the + /// tensor. If it is a tensor, it will be automatically converted + /// to a Tensor that does not require grad unless ``create_graph`` is True. + /// None values can be specified for scalar Tensors or ones that + /// don't require grad. If a None value would be acceptable then + /// this argument is optional. + /// \param retain_graph If ``false``, the graph used to compute + /// the grads will be freed. Note that in nearly all cases setting + /// this option to True is not needed and often can be worked around + /// in a much more efficient way. Defaults to the value of + /// ``create_graph``. + /// \param create_graph If ``true``, graph of the derivative will + /// be constructed, allowing to compute higher order derivative + /// products. Defaults to ``false``. + /// \param inputs Inputs w.r.t. which the gradient will be accumulated into + /// ``at::Tensor::grad``. All other Tensors will be ignored. If not + /// provided, the gradient is accumulated into all the leaf Tensors + /// that were used to compute the current tensor. + /// When inputs are provided and a given input is not a leaf, + /// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients). + /// It is an implementation detail on which the user should not rely. + /// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details. + void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const { + // NB: Adding this wrapper to _backward here because we'd like our + // 'backwards' api to accept the 'inputs' argument optionally. Since code gen + // currently does not support optional of TensorList our approach is to replace + // backward in native_functions.yaml with _backward and call it here instead. + if (inputs.has_value()) { + TORCH_CHECK(inputs.value().size() > 0, "'inputs' argument to backward cannot be empty") + this->_backward(inputs.value(), gradient, retain_graph, create_graph); + } else { + this->_backward({}, gradient, retain_graph, create_graph); + } + } + + /// \fn Tensor detach() const; + /// + /// Returns a new Tensor, detached from the current graph. + /// The result will never require gradient. + + /// \fn Tensor & detach_() const; + /// + /// Detaches the Tensor from the graph that created it, making it a leaf. + /// Views cannot be detached in-place. + + /// \fn void retain_grad() const; + /// + /// Enables this Tensor to have their :attr:`grad` populated during + /// :func:`backward`. This is a no-op for leaf tensors. + + /// \fn bool retains_grad() const; + /// + /// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be + /// populated during :func:`backward`, ``false`` otherwise. + + const Tensor& set_requires_grad(bool requires_grad) const { + TensorBase::set_requires_grad(requires_grad); + return *this; + } + + /// Return a mutable reference to the gradient. This is conventionally + /// used as `t.grad() = x` to set a gradient to a completely new tensor. + /// Note that this function work with a non-const Tensor and is not + /// thread safe. + Tensor& mutable_grad() const { + return impl_->mutable_grad(); + } + + /// This function returns an undefined tensor by default and returns a defined tensor + /// the first time a call to `backward()` computes gradients for this Tensor. + /// The attribute will then contain the gradients computed and future calls + /// to `backward()` will accumulate (add) gradients into it. + const Tensor& grad() const { + const Tensor& maybe_grad = impl_->grad(); + if (!is_leaf() && !retains_grad() && !maybe_grad.defined()) { + TORCH_WARN( + "The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad " + "attribute won't be populated during autograd.backward(). If you indeed want the .grad " + "field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. " + "If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor " + "instead. See github.com/pytorch/pytorch/pull/30531 for more information."); + } + return maybe_grad; + } + + // The Forward AD API functions below are low level and are not to be used by end + // users who should use the API provided in torch/csrc/autograd.h + + /// This function returns the forward gradient for this Tensor at the given level. + const Tensor& _fw_grad(uint64_t level) const { + return impl_->_fw_grad(level, *this); + } + + /// This function can be used to set the value of the forward grad. + /// Note that the given new_grad might not be used directly if it has different + /// metadata (size/stride/storage offset) compared to this Tensor. In that case, + /// new_grad content will be copied into a new Tensor + void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const { + impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op); + } + + + // STOP. Thinking of adding a method here, which only makes use + // of other ATen methods? Define it in native_functions.yaml. + + //example + //Tensor * add(Tensor & b); + void __dispatch__backward(at::TensorList inputs, const ::std::optional & gradient={}, ::std::optional retain_graph=::std::nullopt, bool create_graph=false) const; + void __dispatch_set_data(const at::Tensor & new_data) const; + at::Tensor __dispatch_data() const; + bool __dispatch_is_leaf() const; + int64_t __dispatch_output_nr() const; + int64_t __dispatch__version() const; + at::Tensor & __dispatch_requires_grad_(bool requires_grad=true) const; + void __dispatch_retain_grad() const; + bool __dispatch_retains_grad() const; + at::Tensor _fw_primal(int64_t level) const; + at::Tensor & rename_(::std::optional names) const; + at::Tensor rename(::std::optional names) const; + at::Tensor align_to(at::DimnameList names) const; + at::Tensor align_to(at::DimnameList order, int64_t ellipsis_idx) const; + at::Tensor align_as(const at::Tensor & other) const; + at::Tensor refine_names(at::DimnameList names) const; + at::Tensor abs() const; + at::Tensor & abs_() const; + at::Tensor absolute() const; + at::Tensor & absolute_() const; + at::Tensor angle() const; + at::Tensor sgn() const; + at::Tensor & sgn_() const; + at::Tensor chalf(::std::optional memory_format=::std::nullopt) const; + at::Tensor _conj() const; + at::Tensor __dispatch_conj() const; + at::Tensor _conj_physical() const; + at::Tensor conj_physical() const; + at::Tensor & conj_physical_() const; + at::Tensor resolve_conj() const; + at::Tensor resolve_neg() const; + at::Tensor _neg_view() const; + at::Tensor acos() const; + at::Tensor & acos_() const; + at::Tensor arccos() const; + at::Tensor & arccos_() const; + at::Tensor add(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & add_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor add(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & add_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor addmv(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addmv_(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor addr(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addr_(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor _is_all_true() const; + at::Tensor _is_any_true() const; + at::Tensor all(int64_t dim, bool keepdim=false) const; + at::Tensor all(at::OptionalIntArrayRef dim, bool keepdim=false) const; + at::Tensor all(at::Dimname dim, bool keepdim=false) const; + bool allclose(const at::Tensor & other, double rtol=1e-05, double atol=1e-08, bool equal_nan=false) const; + at::Tensor any(int64_t dim, bool keepdim=false) const; + at::Tensor any(at::OptionalIntArrayRef dim, bool keepdim=false) const; + at::Tensor any(at::Dimname dim, bool keepdim=false) const; + at::Tensor argmax(::std::optional dim=::std::nullopt, bool keepdim=false) const; + at::Tensor argmin(::std::optional dim=::std::nullopt, bool keepdim=false) const; + at::Tensor acosh() const; + at::Tensor & acosh_() const; + at::Tensor arccosh() const; + at::Tensor & arccosh_() const; + at::Tensor asinh() const; + at::Tensor & asinh_() const; + at::Tensor arcsinh() const; + at::Tensor & arcsinh_() const; + at::Tensor atanh() const; + at::Tensor & atanh_() const; + at::Tensor arctanh() const; + at::Tensor & arctanh_() const; + at::Tensor as_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor as_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + const at::Tensor & as_strided_(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + const at::Tensor & as_strided__symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor asin() const; + at::Tensor & asin_() const; + at::Tensor arcsin() const; + at::Tensor & arcsin_() const; + at::Tensor atan() const; + at::Tensor & atan_() const; + at::Tensor arctan() const; + at::Tensor & arctan_() const; + at::Tensor baddbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & baddbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor bernoulli(::std::optional generator=::std::nullopt) const; + at::Tensor & bernoulli_(const at::Tensor & p, ::std::optional generator=::std::nullopt) const; + at::Tensor & bernoulli_(double p=0.5, ::std::optional generator=::std::nullopt) const; + at::Tensor bernoulli(double p, ::std::optional generator=::std::nullopt) const; + at::Tensor bincount(const ::std::optional & weights={}, int64_t minlength=0) const; + at::Tensor bincount_symint(const ::std::optional & weights={}, c10::SymInt minlength=0) const; + at::Tensor bitwise_not() const; + at::Tensor & bitwise_not_() const; + at::Tensor copysign(const at::Tensor & other) const; + at::Tensor & copysign_(const at::Tensor & other) const; + at::Tensor copysign(const at::Scalar & other) const; + at::Tensor & copysign_(const at::Scalar & other) const; + at::Tensor _lazy_clone() const; + at::Tensor logical_not() const; + at::Tensor & logical_not_() const; + at::Tensor logical_xor(const at::Tensor & other) const; + at::Tensor & logical_xor_(const at::Tensor & other) const; + at::Tensor logical_and(const at::Tensor & other) const; + at::Tensor & logical_and_(const at::Tensor & other) const; + at::Tensor logical_or(const at::Tensor & other) const; + at::Tensor & logical_or_(const at::Tensor & other) const; + at::Tensor bmm(const at::Tensor & mat2) const; + at::Tensor broadcast_to(at::IntArrayRef size) const; + at::Tensor broadcast_to_symint(c10::SymIntArrayRef size) const; + at::Tensor ceil() const; + at::Tensor & ceil_() const; + ::std::vector unsafe_chunk(int64_t chunks, int64_t dim=0) const; + ::std::vector chunk(int64_t chunks, int64_t dim=0) const; + ::std::vector tensor_split(int64_t sections, int64_t dim=0) const; + ::std::vector tensor_split_symint(c10::SymInt sections, int64_t dim=0) const; + ::std::vector tensor_split(at::IntArrayRef indices, int64_t dim=0) const; + ::std::vector tensor_split_symint(c10::SymIntArrayRef indices, int64_t dim=0) const; + ::std::vector tensor_split(const at::Tensor & tensor_indices_or_sections, int64_t dim=0) const; + at::Tensor clamp(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor clamp(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor & clamp_(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor & clamp_(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor clamp_max(const at::Scalar & max) const; + at::Tensor clamp_max(const at::Tensor & max) const; + at::Tensor & clamp_max_(const at::Scalar & max) const; + at::Tensor & clamp_max_(const at::Tensor & max) const; + at::Tensor clamp_min(const at::Scalar & min) const; + at::Tensor clamp_min(const at::Tensor & min) const; + at::Tensor & clamp_min_(const at::Scalar & min) const; + at::Tensor & clamp_min_(const at::Tensor & min) const; + at::Tensor clip(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor clip(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor & clip_(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor & clip_(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor __dispatch_contiguous(at::MemoryFormat memory_format=c10::MemoryFormat::Contiguous) const; + at::Tensor & copy_(const at::Tensor & src, bool non_blocking=false) const; + at::Tensor cos() const; + at::Tensor & cos_() const; + at::Tensor cosh() const; + at::Tensor & cosh_() const; + at::Tensor count_nonzero(at::IntArrayRef dim) const; + at::Tensor count_nonzero(::std::optional dim=::std::nullopt) const; + at::Tensor cov(int64_t correction=1, const ::std::optional & fweights={}, const ::std::optional & aweights={}) const; + at::Tensor corrcoef() const; + ::std::tuple cummax(int64_t dim) const; + ::std::tuple cummax(at::Dimname dim) const; + ::std::tuple cummin(int64_t dim) const; + ::std::tuple cummin(at::Dimname dim) const; + at::Tensor cumprod(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumprod_(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumprod(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumprod_(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumsum(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumsum_(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumsum(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumsum_(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor diag_embed(int64_t offset=0, int64_t dim1=-2, int64_t dim2=-1) const; + at::Tensor diagflat(int64_t offset=0) const; + at::Tensor diagonal(int64_t offset=0, int64_t dim1=0, int64_t dim2=1) const; + at::Tensor diagonal(at::Dimname outdim, at::Dimname dim1, at::Dimname dim2, int64_t offset=0) const; + at::Tensor & fill_diagonal_(const at::Scalar & fill_value, bool wrap=false) const; + at::Tensor diff(int64_t n=1, int64_t dim=-1, const ::std::optional & prepend={}, const ::std::optional & append={}) const; + at::Tensor div(const at::Tensor & other) const; + at::Tensor & div_(const at::Tensor & other) const; + at::Tensor div(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor & div_(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor div(const at::Scalar & other) const; + at::Tensor & div_(const at::Scalar & other) const; + at::Tensor div(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor & div_(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor divide(const at::Tensor & other) const; + at::Tensor & divide_(const at::Tensor & other) const; + at::Tensor divide(const at::Scalar & other) const; + at::Tensor & divide_(const at::Scalar & other) const; + at::Tensor divide(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor & divide_(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor divide(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor & divide_(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor true_divide(const at::Tensor & other) const; + at::Tensor & true_divide_(const at::Tensor & other) const; + at::Tensor true_divide(const at::Scalar & other) const; + at::Tensor & true_divide_(const at::Scalar & other) const; + at::Tensor dot(const at::Tensor & tensor) const; + at::Tensor vdot(const at::Tensor & other) const; + at::Tensor new_empty(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_empty(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) const; + at::Tensor new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) const; + at::Tensor new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_full(at::IntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options={}) const; + at::Tensor new_full(at::IntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options={}) const; + at::Tensor new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_zeros(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_zeros(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_zeros_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_zeros_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_ones(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_ones(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_ones_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_ones_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + const at::Tensor & resize_(at::IntArrayRef size, ::std::optional memory_format=::std::nullopt) const; + const at::Tensor & resize__symint(c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt) const; + at::Tensor erf() const; + at::Tensor & erf_() const; + at::Tensor erfc() const; + at::Tensor & erfc_() const; + at::Tensor exp() const; + at::Tensor & exp_() const; + at::Tensor exp2() const; + at::Tensor & exp2_() const; + at::Tensor expm1() const; + at::Tensor & expm1_() const; + at::Tensor expand(at::IntArrayRef size, bool implicit=false) const; + at::Tensor expand_symint(c10::SymIntArrayRef size, bool implicit=false) const; + at::Tensor expand_as(const at::Tensor & other) const; + at::Tensor flatten(int64_t start_dim=0, int64_t end_dim=-1) const; + at::Tensor flatten(int64_t start_dim, int64_t end_dim, at::Dimname out_dim) const; + at::Tensor flatten(at::Dimname start_dim, at::Dimname end_dim, at::Dimname out_dim) const; + at::Tensor flatten(at::DimnameList dims, at::Dimname out_dim) const; + at::Tensor unflatten(int64_t dim, at::IntArrayRef sizes) const; + at::Tensor unflatten_symint(int64_t dim, c10::SymIntArrayRef sizes) const; + at::Tensor unflatten(at::Dimname dim, at::IntArrayRef sizes, at::DimnameList names) const; + at::Tensor unflatten_symint(at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names) const; + at::Tensor & fill_(const at::Scalar & value) const; + at::Tensor & fill_(const at::Tensor & value) const; + at::Tensor floor() const; + at::Tensor & floor_() const; + at::Tensor floor_divide(const at::Tensor & other) const; + at::Tensor & floor_divide_(const at::Tensor & other) const; + at::Tensor floor_divide(const at::Scalar & other) const; + at::Tensor & floor_divide_(const at::Scalar & other) const; + at::Tensor frac() const; + at::Tensor & frac_() const; + at::Tensor gcd(const at::Tensor & other) const; + at::Tensor & gcd_(const at::Tensor & other) const; + at::Tensor lcm(const at::Tensor & other) const; + at::Tensor & lcm_(const at::Tensor & other) const; + at::Tensor index(const c10::List<::std::optional> & indices) const; + at::Tensor & index_copy_(int64_t dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor index_copy(int64_t dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor & index_copy_(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor index_copy(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor & index_put_(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false) const; + at::Tensor index_put(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false) const; + at::Tensor isclose(const at::Tensor & other, double rtol=1e-05, double atol=1e-08, bool equal_nan=false) const; + at::Tensor isnan() const; + bool is_distributed() const; + bool __dispatch_is_floating_point() const; + bool __dispatch_is_complex() const; + bool __dispatch_is_conj() const; + bool __dispatch__is_zerotensor() const; + bool __dispatch_is_neg() const; + at::Tensor isreal() const; + bool is_nonzero() const; + bool is_same_size(const at::Tensor & other) const; + bool __dispatch_is_signed() const; + bool __dispatch_is_inference() const; + at::Tensor kron(const at::Tensor & other) const; + ::std::tuple kthvalue(int64_t k, int64_t dim=-1, bool keepdim=false) const; + ::std::tuple kthvalue_symint(c10::SymInt k, int64_t dim=-1, bool keepdim=false) const; + ::std::tuple kthvalue(int64_t k, at::Dimname dim, bool keepdim=false) const; + ::std::tuple kthvalue_symint(c10::SymInt k, at::Dimname dim, bool keepdim=false) const; + at::Tensor nan_to_num(::std::optional nan=::std::nullopt, ::std::optional posinf=::std::nullopt, ::std::optional neginf=::std::nullopt) const; + at::Tensor & nan_to_num_(::std::optional nan=::std::nullopt, ::std::optional posinf=::std::nullopt, ::std::optional neginf=::std::nullopt) const; + at::Tensor ldexp(const at::Tensor & other) const; + at::Tensor & ldexp_(const at::Tensor & other) const; + at::Tensor log() const; + at::Tensor & log_() const; + at::Tensor log10() const; + at::Tensor & log10_() const; + at::Tensor log1p() const; + at::Tensor & log1p_() const; + at::Tensor log2() const; + at::Tensor & log2_() const; + at::Tensor logaddexp(const at::Tensor & other) const; + at::Tensor logaddexp2(const at::Tensor & other) const; + at::Tensor xlogy(const at::Tensor & other) const; + at::Tensor xlogy(const at::Scalar & other) const; + at::Tensor & xlogy_(const at::Tensor & other) const; + at::Tensor & xlogy_(const at::Scalar & other) const; + at::Tensor log_softmax(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor log_softmax(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor logcumsumexp(int64_t dim) const; + at::Tensor logcumsumexp(at::Dimname dim) const; + at::Tensor logsumexp(at::IntArrayRef dim, bool keepdim=false) const; + at::Tensor logsumexp(at::DimnameList dim, bool keepdim=false) const; + at::Tensor matmul(const at::Tensor & other) const; + at::Tensor matrix_power(int64_t n) const; + at::Tensor matrix_exp() const; + ::std::tuple aminmax(::std::optional dim=::std::nullopt, bool keepdim=false) const; + ::std::tuple max(int64_t dim, bool keepdim=false) const; + ::std::tuple max(at::Dimname dim, bool keepdim=false) const; + at::Tensor amax(at::IntArrayRef dim={}, bool keepdim=false) const; + at::Tensor mean(::std::optional dtype=::std::nullopt) const; + at::Tensor mean(at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor mean(at::DimnameList dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor nanmean(at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor median() const; + ::std::tuple median(int64_t dim, bool keepdim=false) const; + ::std::tuple median(at::Dimname dim, bool keepdim=false) const; + at::Tensor nanmedian() const; + ::std::tuple nanmedian(int64_t dim, bool keepdim=false) const; + ::std::tuple nanmedian(at::Dimname dim, bool keepdim=false) const; + ::std::tuple min(int64_t dim, bool keepdim=false) const; + ::std::tuple min(at::Dimname dim, bool keepdim=false) const; + at::Tensor amin(at::IntArrayRef dim={}, bool keepdim=false) const; + at::Tensor mm(const at::Tensor & mat2) const; + ::std::tuple mode(int64_t dim=-1, bool keepdim=false) const; + ::std::tuple mode(at::Dimname dim, bool keepdim=false) const; + at::Tensor mul(const at::Tensor & other) const; + at::Tensor & mul_(const at::Tensor & other) const; + at::Tensor mul(const at::Scalar & other) const; + at::Tensor & mul_(const at::Scalar & other) const; + at::Tensor multiply(const at::Tensor & other) const; + at::Tensor & multiply_(const at::Tensor & other) const; + at::Tensor multiply(const at::Scalar & other) const; + at::Tensor & multiply_(const at::Scalar & other) const; + at::Tensor mv(const at::Tensor & vec) const; + at::Tensor mvlgamma(int64_t p) const; + at::Tensor & mvlgamma_(int64_t p) const; + at::Tensor narrow_copy(int64_t dim, int64_t start, int64_t length) const; + at::Tensor narrow_copy_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const; + at::Tensor narrow(int64_t dim, int64_t start, int64_t length) const; + at::Tensor narrow_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const; + at::Tensor narrow(int64_t dim, const at::Tensor & start, int64_t length) const; + at::Tensor narrow_symint(int64_t dim, const at::Tensor & start, c10::SymInt length) const; + at::Tensor permute(at::IntArrayRef dims) const; + at::Tensor movedim(at::IntArrayRef source, at::IntArrayRef destination) const; + at::Tensor movedim(int64_t source, int64_t destination) const; + at::Tensor moveaxis(at::IntArrayRef source, at::IntArrayRef destination) const; + at::Tensor moveaxis(int64_t source, int64_t destination) const; + at::Tensor numpy_T() const; + at::Tensor matrix_H() const; + at::Tensor mT() const; + at::Tensor mH() const; + at::Tensor adjoint() const; + bool is_pinned(::std::optional device=::std::nullopt) const; + at::Tensor pin_memory(::std::optional device=::std::nullopt) const; + at::Tensor pinverse(double rcond=1e-15) const; + at::Tensor rad2deg() const; + at::Tensor & rad2deg_() const; + at::Tensor deg2rad() const; + at::Tensor & deg2rad_() const; + at::Tensor ravel() const; + at::Tensor reciprocal() const; + at::Tensor & reciprocal_() const; + at::Tensor neg() const; + at::Tensor & neg_() const; + at::Tensor negative() const; + at::Tensor & negative_() const; + at::Tensor repeat(at::IntArrayRef repeats) const; + at::Tensor repeat_symint(c10::SymIntArrayRef repeats) const; + at::Tensor repeat_interleave(const at::Tensor & repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave_symint(const at::Tensor & repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave(int64_t repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave_symint(c10::SymInt repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor reshape(at::IntArrayRef shape) const; + at::Tensor reshape_symint(c10::SymIntArrayRef shape) const; + at::Tensor _reshape_alias(at::IntArrayRef size, at::IntArrayRef stride) const; + at::Tensor _reshape_alias_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const; + at::Tensor reshape_as(const at::Tensor & other) const; + at::Tensor round() const; + at::Tensor & round_() const; + at::Tensor round(int64_t decimals) const; + at::Tensor & round_(int64_t decimals) const; + at::Tensor relu() const; + at::Tensor & relu_() const; + at::Tensor prelu(const at::Tensor & weight) const; + at::Tensor hardshrink(const at::Scalar & lambd=0.5) const; + at::Tensor hardshrink_backward(const at::Tensor & grad_out, const at::Scalar & lambd) const; + at::Tensor rsqrt() const; + at::Tensor & rsqrt_() const; + at::Tensor select(at::Dimname dim, int64_t index) const; + at::Tensor select(int64_t dim, int64_t index) const; + at::Tensor select_symint(int64_t dim, c10::SymInt index) const; + at::Tensor sigmoid() const; + at::Tensor & sigmoid_() const; + at::Tensor logit(::std::optional eps=::std::nullopt) const; + at::Tensor & logit_(::std::optional eps=::std::nullopt) const; + at::Tensor sin() const; + at::Tensor & sin_() const; + at::Tensor sinc() const; + at::Tensor & sinc_() const; + at::Tensor sinh() const; + at::Tensor & sinh_() const; + at::Tensor detach() const; + at::Tensor & detach_() const; + int64_t size(at::Dimname dim) const; + at::Tensor slice(int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_symint(int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor slice_inverse(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_inverse_symint(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor slice_scatter(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_scatter_symint(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor select_scatter(const at::Tensor & src, int64_t dim, int64_t index) const; + at::Tensor select_scatter_symint(const at::Tensor & src, int64_t dim, c10::SymInt index) const; + at::Tensor diagonal_scatter(const at::Tensor & src, int64_t offset=0, int64_t dim1=0, int64_t dim2=1) const; + at::Tensor as_strided_scatter(const at::Tensor & src, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor as_strided_scatter_symint(const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor smm(const at::Tensor & mat2) const; + at::Tensor softmax(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor softmax(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + ::std::vector unsafe_split(int64_t split_size, int64_t dim=0) const; + ::std::vector unsafe_split_symint(c10::SymInt split_size, int64_t dim=0) const; + ::std::vector split(int64_t split_size, int64_t dim=0) const; + ::std::vector split_symint(c10::SymInt split_size, int64_t dim=0) const; + ::std::vector split(at::IntArrayRef split_size, int64_t dim=0) const; + ::std::vector split_symint(c10::SymIntArrayRef split_size, int64_t dim=0) const; + ::std::vector unsafe_split_with_sizes(at::IntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector unsafe_split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector split_with_sizes(at::IntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector hsplit(int64_t sections) const; + ::std::vector hsplit(at::IntArrayRef indices) const; + ::std::vector vsplit(int64_t sections) const; + ::std::vector vsplit(at::IntArrayRef indices) const; + ::std::vector dsplit(int64_t sections) const; + ::std::vector dsplit(at::IntArrayRef indices) const; + at::Tensor squeeze() const; + at::Tensor squeeze(int64_t dim) const; + at::Tensor squeeze(at::Dimname dim) const; + at::Tensor squeeze(at::IntArrayRef dim) const; + at::Tensor & squeeze_() const; + at::Tensor & squeeze_(int64_t dim) const; + at::Tensor & squeeze_(at::IntArrayRef dim) const; + at::Tensor & squeeze_(at::Dimname dim) const; + at::Tensor sspaddmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) const; + at::Tensor stft(int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) const; + at::Tensor istft(int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional length=::std::nullopt, bool return_complex=false) const; + int64_t stride(at::Dimname dim) const; + at::Tensor sum(::std::optional dtype=::std::nullopt) const; + at::Tensor sum(at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor sum(at::DimnameList dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor nansum(at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor hash_tensor(at::IntArrayRef dim={}, bool keepdim=false, int64_t mode=0) const; + at::Tensor sum_to_size(at::IntArrayRef size) const; + at::Tensor sum_to_size_symint(c10::SymIntArrayRef size) const; + at::Tensor sqrt() const; + at::Tensor & sqrt_() const; + at::Tensor square() const; + at::Tensor & square_() const; + at::Tensor std(bool unbiased) const; + at::Tensor std(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false) const; + at::Tensor std(at::OptionalIntArrayRef dim=::std::nullopt, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor std(at::DimnameList dim, bool unbiased, bool keepdim=false) const; + at::Tensor std(at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor prod(::std::optional dtype=::std::nullopt) const; + at::Tensor prod(int64_t dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor prod(at::Dimname dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor t() const; + at::Tensor & t_() const; + at::Tensor tan() const; + at::Tensor & tan_() const; + at::Tensor tanh() const; + at::Tensor & tanh_() const; + at::Tensor tile(at::IntArrayRef dims) const; + at::Tensor tile_symint(c10::SymIntArrayRef dims) const; + at::Tensor transpose(int64_t dim0, int64_t dim1) const; + at::Tensor transpose(at::Dimname dim0, at::Dimname dim1) const; + at::Tensor & transpose_(int64_t dim0, int64_t dim1) const; + at::Tensor flip(at::IntArrayRef dims) const; + at::Tensor fliplr() const; + at::Tensor flipud() const; + at::Tensor roll(at::IntArrayRef shifts, at::IntArrayRef dims={}) const; + at::Tensor roll_symint(c10::SymIntArrayRef shifts, at::IntArrayRef dims={}) const; + at::Tensor rot90(int64_t k=1, at::IntArrayRef dims={0,1}) const; + at::Tensor _nested_tensor_size() const; + at::Tensor _nested_tensor_strides() const; + at::Tensor _nested_tensor_storage_offsets() const; + at::Tensor trunc() const; + at::Tensor & trunc_() const; + at::Tensor fix() const; + at::Tensor & fix_() const; + at::Tensor type_as(const at::Tensor & other) const; + at::Tensor unsqueeze(int64_t dim) const; + at::Tensor & unsqueeze_(int64_t dim) const; + at::Tensor var(bool unbiased) const; + at::Tensor var(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false) const; + at::Tensor var(at::OptionalIntArrayRef dim=::std::nullopt, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor var(at::DimnameList dim, bool unbiased, bool keepdim=false) const; + at::Tensor var(at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor view_as(const at::Tensor & other) const; + at::Tensor where(const at::Tensor & condition, const at::Tensor & other) const; + at::Tensor where(const at::Tensor & condition, const at::Scalar & other) const; + at::Tensor norm(const ::std::optional & p, at::ScalarType dtype) const; + at::Tensor norm(const at::Scalar & p=2) const; + at::Tensor norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype) const; + at::Tensor norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim=false) const; + at::Tensor norm(const ::std::optional & p, at::DimnameList dim, bool keepdim, at::ScalarType dtype) const; + at::Tensor norm(const ::std::optional & p, at::DimnameList dim, bool keepdim=false) const; + ::std::tuple frexp() const; + at::Tensor clone(::std::optional memory_format=::std::nullopt) const; + at::Tensor positive() const; + const at::Tensor & resize_as_(const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt) const; + const at::Tensor & resize_as_sparse_(const at::Tensor & the_template) const; + at::Tensor & zero_() const; + at::Tensor sub(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & sub_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor sub(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & sub_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor subtract(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & subtract_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor subtract(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & subtract_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor heaviside(const at::Tensor & values) const; + at::Tensor & heaviside_(const at::Tensor & values) const; + at::Tensor addmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addmm_(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor _addmm_activation(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1, bool use_gelu=false) const; + const at::Tensor & sparse_resize_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const; + const at::Tensor & sparse_resize_and_clear_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const; + at::Tensor sparse_mask(const at::Tensor & mask) const; + at::Tensor _sparse_mask_projection(const at::Tensor & mask, bool accumulate_matches=false) const; + at::Tensor to_dense(::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt) const; + at::Tensor _to_dense(::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt) const; + int64_t sparse_dim() const; + int64_t _dimI() const; + int64_t dense_dim() const; + int64_t _dimV() const; + int64_t _nnz() const; + at::Tensor coalesce() const; + bool is_coalesced() const; + at::Tensor _indices() const; + at::Tensor _values() const; + at::Tensor & _coalesced_(bool coalesced) const; + at::Tensor indices() const; + at::Tensor values() const; + at::Tensor crow_indices() const; + at::Tensor col_indices() const; + at::Tensor ccol_indices() const; + at::Tensor row_indices() const; + ::std::vector unbind(int64_t dim=0) const; + ::std::vector unbind(at::Dimname dim) const; + at::Tensor to_sparse(int64_t sparse_dim) const; + at::Tensor _to_sparse(int64_t sparse_dim) const; + at::Tensor to_sparse(::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse(::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_csr(::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_csr(::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_csc(::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_csc(::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_mkldnn(::std::optional dtype=::std::nullopt) const; + at::Tensor dequantize() const; + double q_scale() const; + int64_t q_zero_point() const; + at::Tensor q_per_channel_scales() const; + at::Tensor q_per_channel_zero_points() const; + int64_t q_per_channel_axis() const; + at::Tensor int_repr() const; + at::QScheme qscheme() const; + at::Tensor _autocast_to_reduced_precision(bool cuda_enabled, bool cpu_enabled, at::ScalarType cuda_dtype, at::ScalarType cpu_dtype) const; + at::Tensor _autocast_to_full_precision(bool cuda_enabled, bool cpu_enabled) const; + at::Tensor to(at::TensorOptions options={}, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, bool copy, ::std::optional memory_format) const; + at::Tensor to(at::Device device, at::ScalarType dtype, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(at::ScalarType dtype, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(const at::Tensor & other, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Scalar item() const; + at::Tensor & set_(at::Storage source) const; + at::Tensor & set_(at::Storage source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride={}) const; + at::Tensor & set__symint(at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride={}) const; + at::Tensor & set_(const at::Tensor & source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride={}) const; + at::Tensor & set__symint(const at::Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride={}) const; + at::Tensor & set_(const at::Tensor & source) const; + at::Tensor & set_() const; + bool is_set_to(const at::Tensor & tensor) const; + at::Tensor & masked_fill_(const at::Tensor & mask, const at::Scalar & value) const; + at::Tensor masked_fill(const at::Tensor & mask, const at::Scalar & value) const; + at::Tensor & masked_fill_(const at::Tensor & mask, const at::Tensor & value) const; + at::Tensor masked_fill(const at::Tensor & mask, const at::Tensor & value) const; + at::Tensor & masked_scatter_(const at::Tensor & mask, const at::Tensor & source) const; + at::Tensor masked_scatter(const at::Tensor & mask, const at::Tensor & source) const; + at::Tensor view(at::IntArrayRef size) const; + at::Tensor view_symint(c10::SymIntArrayRef size) const; + at::Tensor view(at::ScalarType dtype) const; + at::Tensor & put_(const at::Tensor & index, const at::Tensor & source, bool accumulate=false) const; + at::Tensor put(const at::Tensor & index, const at::Tensor & source, bool accumulate=false) const; + at::Tensor & index_add_(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor index_add(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor index_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor & index_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true) const; + at::Tensor index_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true) const; + at::Tensor & index_fill_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor index_fill(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & index_fill_(int64_t dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor index_fill(int64_t dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor & index_fill_(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & index_fill_(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor index_fill(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor index_fill(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const; + at::Tensor scatter(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor scatter_add(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor & scatter_add_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self=true) const; + at::Tensor & scatter_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self=true) const; + at::Tensor & eq_(const at::Scalar & other) const; + at::Tensor & eq_(const at::Tensor & other) const; + at::Tensor bitwise_and(const at::Scalar & other) const; + at::Tensor bitwise_and(const at::Tensor & other) const; + at::Tensor & bitwise_and_(const at::Scalar & other) const; + at::Tensor & bitwise_and_(const at::Tensor & other) const; + at::Tensor __and__(const at::Scalar & other) const; + at::Tensor __and__(const at::Tensor & other) const; + at::Tensor & __iand__(const at::Scalar & other) const; + at::Tensor & __iand__(const at::Tensor & other) const; + at::Tensor bitwise_or(const at::Scalar & other) const; + at::Tensor bitwise_or(const at::Tensor & other) const; + at::Tensor & bitwise_or_(const at::Scalar & other) const; + at::Tensor & bitwise_or_(const at::Tensor & other) const; + at::Tensor __or__(const at::Scalar & other) const; + at::Tensor __or__(const at::Tensor & other) const; + at::Tensor & __ior__(const at::Scalar & other) const; + at::Tensor & __ior__(const at::Tensor & other) const; + at::Tensor bitwise_xor(const at::Scalar & other) const; + at::Tensor bitwise_xor(const at::Tensor & other) const; + at::Tensor & bitwise_xor_(const at::Scalar & other) const; + at::Tensor & bitwise_xor_(const at::Tensor & other) const; + at::Tensor __xor__(const at::Scalar & other) const; + at::Tensor __xor__(const at::Tensor & other) const; + at::Tensor & __ixor__(const at::Scalar & other) const; + at::Tensor & __ixor__(const at::Tensor & other) const; + at::Tensor __lshift__(const at::Scalar & other) const; + at::Tensor __lshift__(const at::Tensor & other) const; + at::Tensor & __ilshift__(const at::Scalar & other) const; + at::Tensor & __ilshift__(const at::Tensor & other) const; + at::Tensor bitwise_left_shift(const at::Tensor & other) const; + at::Tensor & bitwise_left_shift_(const at::Tensor & other) const; + at::Tensor bitwise_left_shift(const at::Scalar & other) const; + at::Tensor & bitwise_left_shift_(const at::Scalar & other) const; + at::Tensor __rshift__(const at::Scalar & other) const; + at::Tensor __rshift__(const at::Tensor & other) const; + at::Tensor & __irshift__(const at::Scalar & other) const; + at::Tensor & __irshift__(const at::Tensor & other) const; + at::Tensor bitwise_right_shift(const at::Tensor & other) const; + at::Tensor & bitwise_right_shift_(const at::Tensor & other) const; + at::Tensor bitwise_right_shift(const at::Scalar & other) const; + at::Tensor & bitwise_right_shift_(const at::Scalar & other) const; + at::Tensor & tril_(int64_t diagonal=0) const; + at::Tensor & tril__symint(c10::SymInt diagonal=0) const; + at::Tensor & triu_(int64_t diagonal=0) const; + at::Tensor & triu__symint(c10::SymInt diagonal=0) const; + at::Tensor & digamma_() const; + at::Tensor & lerp_(const at::Tensor & end, const at::Scalar & weight) const; + at::Tensor & lerp_(const at::Tensor & end, const at::Tensor & weight) const; + at::Tensor & addbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor addbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & random_(int64_t from, ::std::optional to, ::std::optional generator=::std::nullopt) const; + at::Tensor & random_(int64_t to, ::std::optional generator=::std::nullopt) const; + at::Tensor & random_(::std::optional generator=::std::nullopt) const; + at::Tensor & uniform_(double from=0, double to=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & cauchy_(double median=0, double sigma=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & log_normal_(double mean=1, double std=2, ::std::optional generator=::std::nullopt) const; + at::Tensor & exponential_(double lambd=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & geometric_(double p, ::std::optional generator=::std::nullopt) const; + at::Tensor diag(int64_t diagonal=0) const; + at::Tensor cross(const at::Tensor & other, ::std::optional dim=::std::nullopt) const; + at::Tensor triu(int64_t diagonal=0) const; + at::Tensor triu_symint(c10::SymInt diagonal=0) const; + at::Tensor tril(int64_t diagonal=0) const; + at::Tensor tril_symint(c10::SymInt diagonal=0) const; + at::Tensor trace() const; + at::Tensor ne(const at::Scalar & other) const; + at::Tensor ne(const at::Tensor & other) const; + at::Tensor & ne_(const at::Scalar & other) const; + at::Tensor & ne_(const at::Tensor & other) const; + at::Tensor not_equal(const at::Scalar & other) const; + at::Tensor not_equal(const at::Tensor & other) const; + at::Tensor & not_equal_(const at::Scalar & other) const; + at::Tensor & not_equal_(const at::Tensor & other) const; + at::Tensor eq(const at::Scalar & other) const; + at::Tensor eq(const at::Tensor & other) const; + at::Tensor ge(const at::Scalar & other) const; + at::Tensor ge(const at::Tensor & other) const; + at::Tensor & ge_(const at::Scalar & other) const; + at::Tensor & ge_(const at::Tensor & other) const; + at::Tensor greater_equal(const at::Scalar & other) const; + at::Tensor greater_equal(const at::Tensor & other) const; + at::Tensor & greater_equal_(const at::Scalar & other) const; + at::Tensor & greater_equal_(const at::Tensor & other) const; + at::Tensor le(const at::Scalar & other) const; + at::Tensor le(const at::Tensor & other) const; + at::Tensor & le_(const at::Scalar & other) const; + at::Tensor & le_(const at::Tensor & other) const; + at::Tensor less_equal(const at::Scalar & other) const; + at::Tensor less_equal(const at::Tensor & other) const; + at::Tensor & less_equal_(const at::Scalar & other) const; + at::Tensor & less_equal_(const at::Tensor & other) const; + at::Tensor gt(const at::Scalar & other) const; + at::Tensor gt(const at::Tensor & other) const; + at::Tensor & gt_(const at::Scalar & other) const; + at::Tensor & gt_(const at::Tensor & other) const; + at::Tensor greater(const at::Scalar & other) const; + at::Tensor greater(const at::Tensor & other) const; + at::Tensor & greater_(const at::Scalar & other) const; + at::Tensor & greater_(const at::Tensor & other) const; + at::Tensor lt(const at::Scalar & other) const; + at::Tensor lt(const at::Tensor & other) const; + at::Tensor & lt_(const at::Scalar & other) const; + at::Tensor & lt_(const at::Tensor & other) const; + at::Tensor less(const at::Scalar & other) const; + at::Tensor less(const at::Tensor & other) const; + at::Tensor & less_(const at::Scalar & other) const; + at::Tensor & less_(const at::Tensor & other) const; + at::Tensor take(const at::Tensor & index) const; + at::Tensor take_along_dim(const at::Tensor & indices, ::std::optional dim=::std::nullopt) const; + at::Tensor index_select(int64_t dim, const at::Tensor & index) const; + at::Tensor index_select(at::Dimname dim, const at::Tensor & index) const; + at::Tensor masked_select(const at::Tensor & mask) const; + at::Tensor nonzero() const; + at::Tensor nonzero_static(int64_t size, int64_t fill_value=-1) const; + at::Tensor nonzero_static_symint(c10::SymInt size, int64_t fill_value=-1) const; + ::std::vector nonzero_numpy() const; + at::Tensor argwhere() const; + at::Tensor gather(int64_t dim, const at::Tensor & index, bool sparse_grad=false) const; + at::Tensor gather(at::Dimname dim, const at::Tensor & index, bool sparse_grad=false) const; + at::Tensor addcmul(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor & addcmul_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor addcdiv(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor & addcdiv_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + ::std::tuple triangular_solve(const at::Tensor & A, bool upper=true, bool transpose=false, bool unitriangular=false) const; + ::std::tuple svd(bool some=true, bool compute_uv=true) const; + at::Tensor swapaxes(int64_t axis0, int64_t axis1) const; + at::Tensor & swapaxes_(int64_t axis0, int64_t axis1) const; + at::Tensor swapdims(int64_t dim0, int64_t dim1) const; + at::Tensor & swapdims_(int64_t dim0, int64_t dim1) const; + at::Tensor cholesky(bool upper=false) const; + at::Tensor cholesky_solve(const at::Tensor & input2, bool upper=false) const; + at::Tensor cholesky_inverse(bool upper=false) const; + ::std::tuple qr(bool some=true) const; + ::std::tuple geqrf() const; + at::Tensor orgqr(const at::Tensor & input2) const; + at::Tensor ormqr(const at::Tensor & input2, const at::Tensor & input3, bool left=true, bool transpose=false) const; + at::Tensor lu_solve(const at::Tensor & LU_data, const at::Tensor & LU_pivots) const; + at::Tensor multinomial(int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt) const; + at::Tensor multinomial_symint(c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt) const; + at::Tensor & lgamma_() const; + at::Tensor lgamma() const; + at::Tensor digamma() const; + at::Tensor polygamma(int64_t n) const; + at::Tensor & polygamma_(int64_t n) const; + at::Tensor erfinv() const; + at::Tensor & erfinv_() const; + at::Tensor i0() const; + at::Tensor & i0_() const; + at::Tensor sign() const; + at::Tensor & sign_() const; + at::Tensor signbit() const; + at::Tensor dist(const at::Tensor & other, const at::Scalar & p=2) const; + at::Tensor & atan2_(const at::Tensor & other) const; + at::Tensor atan2(const at::Tensor & other) const; + at::Tensor arctan2(const at::Tensor & other) const; + at::Tensor & arctan2_(const at::Tensor & other) const; + at::Tensor lerp(const at::Tensor & end, const at::Scalar & weight) const; + at::Tensor lerp(const at::Tensor & end, const at::Tensor & weight) const; + at::Tensor histc(int64_t bins=100, const at::Scalar & min=0, const at::Scalar & max=0) const; + ::std::tuple histogram(const at::Tensor & bins, const ::std::optional & weight={}, bool density=false) const; + ::std::tuple histogram(int64_t bins=100, ::std::optional> range=::std::nullopt, const ::std::optional & weight={}, bool density=false) const; + at::Tensor fmod(const at::Scalar & other) const; + at::Tensor & fmod_(const at::Scalar & other) const; + at::Tensor fmod(const at::Tensor & other) const; + at::Tensor & fmod_(const at::Tensor & other) const; + at::Tensor hypot(const at::Tensor & other) const; + at::Tensor & hypot_(const at::Tensor & other) const; + at::Tensor igamma(const at::Tensor & other) const; + at::Tensor & igamma_(const at::Tensor & other) const; + at::Tensor igammac(const at::Tensor & other) const; + at::Tensor & igammac_(const at::Tensor & other) const; + at::Tensor nextafter(const at::Tensor & other) const; + at::Tensor & nextafter_(const at::Tensor & other) const; + at::Tensor remainder(const at::Scalar & other) const; + at::Tensor & remainder_(const at::Scalar & other) const; + at::Tensor remainder(const at::Tensor & other) const; + at::Tensor & remainder_(const at::Tensor & other) const; + at::Tensor min() const; + at::Tensor fmin(const at::Tensor & other) const; + at::Tensor max() const; + at::Tensor fmax(const at::Tensor & other) const; + at::Tensor maximum(const at::Tensor & other) const; + at::Tensor max(const at::Tensor & other) const; + at::Tensor minimum(const at::Tensor & other) const; + at::Tensor min(const at::Tensor & other) const; + at::Tensor quantile(const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor quantile(double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor nanquantile(const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor nanquantile(double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + ::std::tuple sort(int64_t dim=-1, bool descending=false) const; + ::std::tuple sort(::std::optional stable, int64_t dim=-1, bool descending=false) const; + ::std::tuple sort(at::Dimname dim, bool descending=false) const; + ::std::tuple sort(::std::optional stable, at::Dimname dim, bool descending=false) const; + at::Tensor msort() const; + at::Tensor argsort(int64_t dim=-1, bool descending=false) const; + at::Tensor argsort(bool stable, int64_t dim=-1, bool descending=false) const; + at::Tensor argsort(at::Dimname dim, bool descending=false) const; + ::std::tuple topk(int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true) const; + ::std::tuple topk_symint(c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true) const; + at::Tensor all() const; + at::Tensor any() const; + at::Tensor renorm(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const; + at::Tensor & renorm_(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const; + at::Tensor unfold(int64_t dimension, int64_t size, int64_t step) const; + bool equal(const at::Tensor & other) const; + at::Tensor pow(const at::Tensor & exponent) const; + at::Tensor pow(const at::Scalar & exponent) const; + at::Tensor & pow_(const at::Scalar & exponent) const; + at::Tensor & pow_(const at::Tensor & exponent) const; + at::Tensor float_power(const at::Tensor & exponent) const; + at::Tensor float_power(const at::Scalar & exponent) const; + at::Tensor & float_power_(const at::Scalar & exponent) const; + at::Tensor & float_power_(const at::Tensor & exponent) const; + at::Tensor & normal_(double mean=0, double std=1, ::std::optional generator=::std::nullopt) const; + at::Tensor alias() const; + at::Tensor isfinite() const; + at::Tensor isinf() const; + void record_stream(at::Stream s) const; + at::Tensor isposinf() const; + at::Tensor isneginf() const; + at::Tensor det() const; + ::std::tuple slogdet() const; + at::Tensor logdet() const; + at::Tensor inverse() const; + at::Tensor inner(const at::Tensor & other) const; + at::Tensor outer(const at::Tensor & vec2) const; + at::Tensor ger(const at::Tensor & vec2) const; + at::Tensor to_padded_tensor(double padding, at::OptionalIntArrayRef output_size=::std::nullopt) const; + at::Tensor to_padded_tensor_symint(double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) const; + + // Special C++ only overloads for std()-like functions (See gh-40287) + // These are needed because int -> bool conversion takes precedence over int -> IntArrayRef + // So, for example std(0) would select the std(unbiased=False) overload + + Tensor var(int dim) const { + return var(IntArrayRef{dim}); + } + + Tensor std(int dim) const { + return std(IntArrayRef{dim}); + } + + // We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the + // at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet. + // Before that change, we make this method to maintain BC for C++ usage like + // `x.to(y.dtype)`. + // TODO: remove following two after at::kDouble and its friends are TypeMeta's. + inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const { + return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy); + } + inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const { + return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy); + } + + template + decltype(auto) m(F func, Args&&... params) const { + return func(*this, std::forward(params)...); + } + + /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended + /// to be used from functions that need to access the `Variable`'s equivalent `Tensor` + /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`). + /// + /// One notable difference with the legacy `.data()` function is that changes to the + /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset) + /// will not update the original `Variable`, due to the fact that this function + /// shallow-copies the `Variable`'s underlying TensorImpl. + at::Tensor tensor_data() const { + return TensorBase::tensor_data(); + } + + /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data` + /// in Python, which create a new `Variable` that shares the same storage and + /// tensor metadata with the original `Variable`, but with a completely new + /// autograd history. + /// + /// NOTE: If we change the tensor metadata (e.g. sizes / strides / + /// storage / storage_offset) of a variable created from `var.variable_data()`, those + /// changes will not update the original variable `var`. In `.variable_data()`, we set + /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal, + /// in order to prevent users from changing metadata of `var.variable_data()` + /// and expecting the original variable `var` to also be updated. + at::Tensor variable_data() const { + return TensorBase::variable_data(); + } + + // Hooks + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + template + using hook_return_void_t = std::enable_if_t>::value, unsigned>; + template + using hook_return_var_t = std::enable_if_t, Tensor>, unsigned>; + + /// Registers a backward hook. + /// + /// The hook will be called every time a gradient with respect to the Tensor is computed. + /// The hook should have one of the following signature: + /// ``` + /// hook(Tensor grad) -> Tensor + /// ``` + /// ``` + /// hook(Tensor grad) -> void + /// ``` + /// The hook should not modify its argument, but it can optionally return a new gradient + /// which will be used in place of `grad`. + /// + /// This function returns the index of the hook in the list which can be used to remove hook. + /// + /// Example: + /// @code + /// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad()); + /// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient + /// v.backward(torch::tensor({1., 2., 3.})); + /// // This prints: + /// // ``` + /// // 2 + /// // 4 + /// // 6 + /// // [ CPUFloatType{3} ] + /// // ``` + /// std::cout << v.grad() << std::endl; + /// v.remove_hook(h); // removes the hook + /// @endcode + template + hook_return_void_t register_hook(T&& hook) const; + template + hook_return_var_t register_hook(T&& hook) const; + + // Variable methods + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Tensor data() const { + return TensorBase::data(); + } + + void _backward(TensorList inputs, const std::optional& gradient, std::optional keep_graph, bool create_graph) const; + + const Tensor& requires_grad_(bool _requires_grad=true) const { + TensorBase::requires_grad_(_requires_grad); + return *this; + } +}; + +namespace detail { +// Helper creator for Tensor class which doesn't requires the users to pass +// in an intrusive_ptr instead it just converts the argument passed to +// requested intrusive_ptr type. +template +Tensor make_tensor(Args&&... args) { + return Tensor(c10::make_intrusive(std::forward(args)...)); +} + +} // namespace detail + +} // namespace at + + +namespace at { + +// aten::_backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> () +inline void Tensor::__dispatch__backward(at::TensorList inputs, const ::std::optional & gradient, ::std::optional retain_graph, bool create_graph) const { + return at::_ops::_backward::call(const_cast(*this), inputs, gradient, retain_graph, create_graph); +} + +// aten::set_data(Tensor(a!) self, Tensor new_data) -> () +inline void Tensor::__dispatch_set_data(const at::Tensor & new_data) const { + return at::_ops::set_data::call(const_cast(*this), new_data); +} + +// aten::data(Tensor self) -> Tensor +inline at::Tensor Tensor::__dispatch_data() const { + return at::_ops::data::call(const_cast(*this)); +} + +// aten::is_leaf(Tensor self) -> bool +inline bool Tensor::__dispatch_is_leaf() const { + return at::_ops::is_leaf::call(const_cast(*this)); +} + +// aten::output_nr(Tensor self) -> int +inline int64_t Tensor::__dispatch_output_nr() const { + return at::_ops::output_nr::call(const_cast(*this)); +} + +// aten::_version(Tensor self) -> int +inline int64_t Tensor::__dispatch__version() const { + return at::_ops::_version::call(const_cast(*this)); +} + +// aten::requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!) +inline at::Tensor & Tensor::__dispatch_requires_grad_(bool requires_grad) const { + return at::_ops::requires_grad_::call(const_cast(*this), requires_grad); +} + +// aten::retain_grad(Tensor(a!) self) -> () +inline void Tensor::__dispatch_retain_grad() const { + return at::_ops::retain_grad::call(const_cast(*this)); +} + +// aten::retains_grad(Tensor self) -> bool +inline bool Tensor::__dispatch_retains_grad() const { + return at::_ops::retains_grad::call(const_cast(*this)); +} + +// aten::_fw_primal(Tensor(a) self, int level) -> Tensor(a) +inline at::Tensor Tensor::_fw_primal(int64_t level) const { + return at::_ops::_fw_primal::call(const_cast(*this), level); +} + +// aten::rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!) +inline at::Tensor & Tensor::rename_(::std::optional names) const { + return at::_ops::rename_::call(const_cast(*this), names); +} + +// aten::rename(Tensor(a) self, Dimname[]? names) -> Tensor(a) +inline at::Tensor Tensor::rename(::std::optional names) const { + return at::_ops::rename::call(const_cast(*this), names); +} + +// aten::align_to(Tensor(a) self, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::align_to(at::DimnameList names) const { + return at::_ops::align_to::call(const_cast(*this), names); +} + +// aten::align_to.ellipsis_idx(Tensor(a) self, Dimname[] order, int ellipsis_idx) -> Tensor(a) +inline at::Tensor Tensor::align_to(at::DimnameList order, int64_t ellipsis_idx) const { + return at::_ops::align_to_ellipsis_idx::call(const_cast(*this), order, ellipsis_idx); +} + +// aten::align_as(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::align_as(const at::Tensor & other) const { + return at::_ops::align_as::call(const_cast(*this), other); +} + +// aten::refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::refine_names(at::DimnameList names) const { + return at::_ops::refine_names::call(const_cast(*this), names); +} + +// aten::abs(Tensor self) -> Tensor +inline at::Tensor Tensor::abs() const { + return at::_ops::abs::call(const_cast(*this)); +} + +// aten::abs_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::abs_() const { + return at::_ops::abs_::call(const_cast(*this)); +} + +// aten::absolute(Tensor self) -> Tensor +inline at::Tensor Tensor::absolute() const { + return at::_ops::absolute::call(const_cast(*this)); +} + +// aten::absolute_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::absolute_() const { + return at::_ops::absolute_::call(const_cast(*this)); +} + +// aten::angle(Tensor self) -> Tensor +inline at::Tensor Tensor::angle() const { + return at::_ops::angle::call(const_cast(*this)); +} + +// aten::sgn(Tensor self) -> Tensor +inline at::Tensor Tensor::sgn() const { + return at::_ops::sgn::call(const_cast(*this)); +} + +// aten::sgn_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sgn_() const { + return at::_ops::sgn_::call(const_cast(*this)); +} + +// aten::chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor Tensor::chalf(::std::optional memory_format) const { + return at::_ops::chalf::call(const_cast(*this), memory_format); +} + +// aten::_conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_conj() const { + return at::_ops::_conj::call(const_cast(*this)); +} + +// aten::conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::__dispatch_conj() const { + return at::_ops::conj::call(const_cast(*this)); +} + +// aten::_conj_physical(Tensor self) -> Tensor +inline at::Tensor Tensor::_conj_physical() const { + return at::_ops::_conj_physical::call(const_cast(*this)); +} + +// aten::conj_physical(Tensor self) -> Tensor +inline at::Tensor Tensor::conj_physical() const { + return at::_ops::conj_physical::call(const_cast(*this)); +} + +// aten::conj_physical_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::conj_physical_() const { + return at::_ops::conj_physical_::call(const_cast(*this)); +} + +// aten::resolve_conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::resolve_conj() const { + return at::_ops::resolve_conj::call(const_cast(*this)); +} + +// aten::resolve_neg(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::resolve_neg() const { + return at::_ops::resolve_neg::call(const_cast(*this)); +} + +// aten::_neg_view(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_neg_view() const { + return at::_ops::_neg_view::call(const_cast(*this)); +} + +// aten::acos(Tensor self) -> Tensor +inline at::Tensor Tensor::acos() const { + return at::_ops::acos::call(const_cast(*this)); +} + +// aten::acos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::acos_() const { + return at::_ops::acos_::call(const_cast(*this)); +} + +// aten::arccos(Tensor self) -> Tensor +inline at::Tensor Tensor::arccos() const { + return at::_ops::arccos::call(const_cast(*this)); +} + +// aten::arccos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arccos_() const { + return at::_ops::arccos_::call(const_cast(*this)); +} + +// aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::add(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::add_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::add_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::add__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::add(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::add_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::add_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::add__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addmv(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmv::call(const_cast(*this), mat, vec, beta, alpha); +} + +// aten::addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addmv_(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmv_::call(const_cast(*this), mat, vec, beta, alpha); +} + +// aten::addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addr(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addr::call(const_cast(*this), vec1, vec2, beta, alpha); +} + +// aten::addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addr_(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addr_::call(const_cast(*this), vec1, vec2, beta, alpha); +} + +// aten::_is_all_true(Tensor self) -> Tensor +inline at::Tensor Tensor::_is_all_true() const { + return at::_ops::_is_all_true::call(const_cast(*this)); +} + +// aten::_is_any_true(Tensor self) -> Tensor +inline at::Tensor Tensor::_is_any_true() const { + return at::_ops::_is_any_true::call(const_cast(*this)); +} + +// aten::all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(int64_t dim, bool keepdim) const { + return at::_ops::all_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(at::OptionalIntArrayRef dim, bool keepdim) const { + return at::_ops::all_dims::call(const_cast(*this), dim, keepdim); +} + +// aten::all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(at::Dimname dim, bool keepdim) const { + return at::_ops::all_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool +inline bool Tensor::allclose(const at::Tensor & other, double rtol, double atol, bool equal_nan) const { + return at::_ops::allclose::call(const_cast(*this), other, rtol, atol, equal_nan); +} + +// aten::any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(int64_t dim, bool keepdim) const { + return at::_ops::any_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(at::OptionalIntArrayRef dim, bool keepdim) const { + return at::_ops::any_dims::call(const_cast(*this), dim, keepdim); +} + +// aten::any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(at::Dimname dim, bool keepdim) const { + return at::_ops::any_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::argmax(::std::optional dim, bool keepdim) const { + return at::_ops::argmax::call(const_cast(*this), dim, keepdim); +} + +// aten::argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::argmin(::std::optional dim, bool keepdim) const { + return at::_ops::argmin::call(const_cast(*this), dim, keepdim); +} + +// aten::acosh(Tensor self) -> Tensor +inline at::Tensor Tensor::acosh() const { + return at::_ops::acosh::call(const_cast(*this)); +} + +// aten::acosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::acosh_() const { + return at::_ops::acosh_::call(const_cast(*this)); +} + +// aten::arccosh(Tensor self) -> Tensor +inline at::Tensor Tensor::arccosh() const { + return at::_ops::arccosh::call(const_cast(*this)); +} + +// aten::arccosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arccosh_() const { + return at::_ops::arccosh_::call(const_cast(*this)); +} + +// aten::asinh(Tensor self) -> Tensor +inline at::Tensor Tensor::asinh() const { + return at::_ops::asinh::call(const_cast(*this)); +} + +// aten::asinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::asinh_() const { + return at::_ops::asinh_::call(const_cast(*this)); +} + +// aten::arcsinh(Tensor self) -> Tensor +inline at::Tensor Tensor::arcsinh() const { + return at::_ops::arcsinh::call(const_cast(*this)); +} + +// aten::arcsinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arcsinh_() const { + return at::_ops::arcsinh_::call(const_cast(*this)); +} + +// aten::atanh(Tensor self) -> Tensor +inline at::Tensor Tensor::atanh() const { + return at::_ops::atanh::call(const_cast(*this)); +} + +// aten::atanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::atanh_() const { + return at::_ops::atanh_::call(const_cast(*this)); +} + +// aten::arctanh(Tensor self) -> Tensor +inline at::Tensor Tensor::arctanh() const { + return at::_ops::arctanh::call(const_cast(*this)); +} + +// aten::arctanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arctanh_() const { + return at::_ops::arctanh_::call(const_cast(*this)); +} + +// aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a) +inline at::Tensor Tensor::as_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a) +inline at::Tensor Tensor::as_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided::call(const_cast(*this), size, stride, storage_offset); +} + +// aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!) +inline const at::Tensor & Tensor::as_strided_(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!) +inline const at::Tensor & Tensor::as_strided__symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_::call(const_cast(*this), size, stride, storage_offset); +} + +// aten::asin(Tensor self) -> Tensor +inline at::Tensor Tensor::asin() const { + return at::_ops::asin::call(const_cast(*this)); +} + +// aten::asin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::asin_() const { + return at::_ops::asin_::call(const_cast(*this)); +} + +// aten::arcsin(Tensor self) -> Tensor +inline at::Tensor Tensor::arcsin() const { + return at::_ops::arcsin::call(const_cast(*this)); +} + +// aten::arcsin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arcsin_() const { + return at::_ops::arcsin_::call(const_cast(*this)); +} + +// aten::atan(Tensor self) -> Tensor +inline at::Tensor Tensor::atan() const { + return at::_ops::atan::call(const_cast(*this)); +} + +// aten::atan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::atan_() const { + return at::_ops::atan_::call(const_cast(*this)); +} + +// aten::arctan(Tensor self) -> Tensor +inline at::Tensor Tensor::arctan() const { + return at::_ops::arctan::call(const_cast(*this)); +} + +// aten::arctan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arctan_() const { + return at::_ops::arctan_::call(const_cast(*this)); +} + +// aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::baddbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::baddbmm::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::baddbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::baddbmm_::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::bernoulli(Tensor self, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::bernoulli(::std::optional generator) const { + return at::_ops::bernoulli::call(const_cast(*this), generator); +} + +// aten::bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::bernoulli_(const at::Tensor & p, ::std::optional generator) const { + return at::_ops::bernoulli__Tensor::call(const_cast(*this), p, generator); +} + +// aten::bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::bernoulli_(double p, ::std::optional generator) const { + return at::_ops::bernoulli__float::call(const_cast(*this), p, generator); +} + +// aten::bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::bernoulli(double p, ::std::optional generator) const { + return at::_ops::bernoulli_p::call(const_cast(*this), p, generator); +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor Tensor::bincount(const ::std::optional & weights, int64_t minlength) const { + return at::_ops::bincount::call(const_cast(*this), weights, minlength); +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor Tensor::bincount_symint(const ::std::optional & weights, c10::SymInt minlength) const { + return at::_ops::bincount::call(const_cast(*this), weights, minlength); +} + +// aten::bitwise_not(Tensor self) -> Tensor +inline at::Tensor Tensor::bitwise_not() const { + return at::_ops::bitwise_not::call(const_cast(*this)); +} + +// aten::bitwise_not_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_not_() const { + return at::_ops::bitwise_not_::call(const_cast(*this)); +} + +// aten::copysign.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::copysign(const at::Tensor & other) const { + return at::_ops::copysign_Tensor::call(const_cast(*this), other); +} + +// aten::copysign_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::copysign_(const at::Tensor & other) const { + return at::_ops::copysign__Tensor::call(const_cast(*this), other); +} + +// aten::copysign.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::copysign(const at::Scalar & other) const { + return at::_ops::copysign_Scalar::call(const_cast(*this), other); +} + +// aten::copysign_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::copysign_(const at::Scalar & other) const { + return at::_ops::copysign__Scalar::call(const_cast(*this), other); +} + +// aten::_lazy_clone(Tensor self) -> Tensor +inline at::Tensor Tensor::_lazy_clone() const { + return at::_ops::_lazy_clone::call(const_cast(*this)); +} + +// aten::logical_not(Tensor self) -> Tensor +inline at::Tensor Tensor::logical_not() const { + return at::_ops::logical_not::call(const_cast(*this)); +} + +// aten::logical_not_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::logical_not_() const { + return at::_ops::logical_not_::call(const_cast(*this)); +} + +// aten::logical_xor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_xor(const at::Tensor & other) const { + return at::_ops::logical_xor::call(const_cast(*this), other); +} + +// aten::logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_xor_(const at::Tensor & other) const { + return at::_ops::logical_xor_::call(const_cast(*this), other); +} + +// aten::logical_and(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_and(const at::Tensor & other) const { + return at::_ops::logical_and::call(const_cast(*this), other); +} + +// aten::logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_and_(const at::Tensor & other) const { + return at::_ops::logical_and_::call(const_cast(*this), other); +} + +// aten::logical_or(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_or(const at::Tensor & other) const { + return at::_ops::logical_or::call(const_cast(*this), other); +} + +// aten::logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_or_(const at::Tensor & other) const { + return at::_ops::logical_or_::call(const_cast(*this), other); +} + +// aten::bmm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::bmm(const at::Tensor & mat2) const { + return at::_ops::bmm::call(const_cast(*this), mat2); +} + +// aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::broadcast_to(at::IntArrayRef size) const { + return at::_ops::broadcast_to::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::broadcast_to_symint(c10::SymIntArrayRef size) const { + return at::_ops::broadcast_to::call(const_cast(*this), size); +} + +// aten::ceil(Tensor self) -> Tensor +inline at::Tensor Tensor::ceil() const { + return at::_ops::ceil::call(const_cast(*this)); +} + +// aten::ceil_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::ceil_() const { + return at::_ops::ceil_::call(const_cast(*this)); +} + +// aten::unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_chunk(int64_t chunks, int64_t dim) const { + return at::_ops::unsafe_chunk::call(const_cast(*this), chunks, dim); +} + +// aten::chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::chunk(int64_t chunks, int64_t dim) const { + return at::_ops::chunk::call(const_cast(*this), chunks, dim); +} + +// aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(int64_t sections, int64_t dim) const { + return at::_ops::tensor_split_sections::call(const_cast(*this), sections, dim); +} + +// aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split_symint(c10::SymInt sections, int64_t dim) const { + return at::_ops::tensor_split_sections::call(const_cast(*this), sections, dim); +} + +// aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(at::IntArrayRef indices, int64_t dim) const { + return at::_ops::tensor_split_indices::call(const_cast(*this), c10::fromIntArrayRefSlow(indices), dim); +} + +// aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split_symint(c10::SymIntArrayRef indices, int64_t dim) const { + return at::_ops::tensor_split_indices::call(const_cast(*this), indices, dim); +} + +// aten::tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(const at::Tensor & tensor_indices_or_sections, int64_t dim) const { + return at::_ops::tensor_split_tensor_indices_or_sections::call(const_cast(*this), tensor_indices_or_sections, dim); +} + +// aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor +inline at::Tensor Tensor::clamp(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp::call(const_cast(*this), min, max); +} + +// aten::clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor +inline at::Tensor Tensor::clamp(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp_Tensor::call(const_cast(*this), min, max); +} + +// aten::clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp_::call(const_cast(*this), min, max); +} + +// aten::clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp__Tensor::call(const_cast(*this), min, max); +} + +// aten::clamp_max(Tensor self, Scalar max) -> Tensor +inline at::Tensor Tensor::clamp_max(const at::Scalar & max) const { + return at::_ops::clamp_max::call(const_cast(*this), max); +} + +// aten::clamp_max.Tensor(Tensor self, Tensor max) -> Tensor +inline at::Tensor Tensor::clamp_max(const at::Tensor & max) const { + return at::_ops::clamp_max_Tensor::call(const_cast(*this), max); +} + +// aten::clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_max_(const at::Scalar & max) const { + return at::_ops::clamp_max_::call(const_cast(*this), max); +} + +// aten::clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_max_(const at::Tensor & max) const { + return at::_ops::clamp_max__Tensor::call(const_cast(*this), max); +} + +// aten::clamp_min(Tensor self, Scalar min) -> Tensor +inline at::Tensor Tensor::clamp_min(const at::Scalar & min) const { + return at::_ops::clamp_min::call(const_cast(*this), min); +} + +// aten::clamp_min.Tensor(Tensor self, Tensor min) -> Tensor +inline at::Tensor Tensor::clamp_min(const at::Tensor & min) const { + return at::_ops::clamp_min_Tensor::call(const_cast(*this), min); +} + +// aten::clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_min_(const at::Scalar & min) const { + return at::_ops::clamp_min_::call(const_cast(*this), min); +} + +// aten::clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_min_(const at::Tensor & min) const { + return at::_ops::clamp_min__Tensor::call(const_cast(*this), min); +} + +// aten::clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor +inline at::Tensor Tensor::clip(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip::call(const_cast(*this), min, max); +} + +// aten::clip.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor +inline at::Tensor Tensor::clip(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip_Tensor::call(const_cast(*this), min, max); +} + +// aten::clip_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clip_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip_::call(const_cast(*this), min, max); +} + +// aten::clip_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clip_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip__Tensor::call(const_cast(*this), min, max); +} + +// aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a) +inline at::Tensor Tensor::__dispatch_contiguous(at::MemoryFormat memory_format) const { + return at::_ops::contiguous::call(const_cast(*this), memory_format); +} + +// aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) +inline at::Tensor & Tensor::copy_(const at::Tensor & src, bool non_blocking) const { + return at::_ops::copy_::call(const_cast(*this), src, non_blocking); +} + +// aten::cos(Tensor self) -> Tensor +inline at::Tensor Tensor::cos() const { + return at::_ops::cos::call(const_cast(*this)); +} + +// aten::cos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::cos_() const { + return at::_ops::cos_::call(const_cast(*this)); +} + +// aten::cosh(Tensor self) -> Tensor +inline at::Tensor Tensor::cosh() const { + return at::_ops::cosh::call(const_cast(*this)); +} + +// aten::cosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::cosh_() const { + return at::_ops::cosh_::call(const_cast(*this)); +} + +// aten::count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor +inline at::Tensor Tensor::count_nonzero(at::IntArrayRef dim) const { + return at::_ops::count_nonzero_dim_IntList::call(const_cast(*this), dim); +} + +// aten::count_nonzero(Tensor self, int? dim=None) -> Tensor +inline at::Tensor Tensor::count_nonzero(::std::optional dim) const { + return at::_ops::count_nonzero::call(const_cast(*this), dim); +} + +// aten::cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor +inline at::Tensor Tensor::cov(int64_t correction, const ::std::optional & fweights, const ::std::optional & aweights) const { + return at::_ops::cov::call(const_cast(*this), correction, fweights, aweights); +} + +// aten::corrcoef(Tensor self) -> Tensor +inline at::Tensor Tensor::corrcoef() const { + return at::_ops::corrcoef::call(const_cast(*this)); +} + +// aten::cummax(Tensor self, int dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummax(int64_t dim) const { + return at::_ops::cummax::call(const_cast(*this), dim); +} + +// aten::cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummax(at::Dimname dim) const { + return at::_ops::cummax_dimname::call(const_cast(*this), dim); +} + +// aten::cummin(Tensor self, int dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummin(int64_t dim) const { + return at::_ops::cummin::call(const_cast(*this), dim); +} + +// aten::cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummin(at::Dimname dim) const { + return at::_ops::cummin_dimname::call(const_cast(*this), dim); +} + +// aten::cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumprod(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumprod::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumprod_(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumprod_::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumprod(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumprod_dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumprod_(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumprod__dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumsum(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumsum::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumsum_(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumsum_::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumsum(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumsum_dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumsum_(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumsum__dimname::call(const_cast(*this), dim, dtype); +} + +// aten::diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor +inline at::Tensor Tensor::diag_embed(int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diag_embed::call(const_cast(*this), offset, dim1, dim2); +} + +// aten::diagflat(Tensor self, int offset=0) -> Tensor +inline at::Tensor Tensor::diagflat(int64_t offset) const { + return at::_ops::diagflat::call(const_cast(*this), offset); +} + +// aten::diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a) +inline at::Tensor Tensor::diagonal(int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diagonal::call(const_cast(*this), offset, dim1, dim2); +} + +// aten::diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a) +inline at::Tensor Tensor::diagonal(at::Dimname outdim, at::Dimname dim1, at::Dimname dim2, int64_t offset) const { + return at::_ops::diagonal_Dimname::call(const_cast(*this), outdim, dim1, dim2, offset); +} + +// aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) +inline at::Tensor & Tensor::fill_diagonal_(const at::Scalar & fill_value, bool wrap) const { + return at::_ops::fill_diagonal_::call(const_cast(*this), fill_value, wrap); +} + +// aten::diff(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None) -> Tensor +inline at::Tensor Tensor::diff(int64_t n, int64_t dim, const ::std::optional & prepend, const ::std::optional & append) const { + return at::_ops::diff::call(const_cast(*this), n, dim, prepend, append); +} + +// aten::div.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::div(const at::Tensor & other) const { + return at::_ops::div_Tensor::call(const_cast(*this), other); +} + +// aten::div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Tensor & other) const { + return at::_ops::div__Tensor::call(const_cast(*this), other); +} + +// aten::div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::div(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::div_Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::div__Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::div(const at::Scalar & other) const { + return at::_ops::div_Scalar::call(const_cast(*this), other); +} + +// aten::div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Scalar & other) const { + return at::_ops::div__Scalar::call(const_cast(*this), other); +} + +// aten::div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::div(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::div_Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::div__Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::divide(const at::Tensor & other) const { + return at::_ops::divide_Tensor::call(const_cast(*this), other); +} + +// aten::divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Tensor & other) const { + return at::_ops::divide__Tensor::call(const_cast(*this), other); +} + +// aten::divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::divide(const at::Scalar & other) const { + return at::_ops::divide_Scalar::call(const_cast(*this), other); +} + +// aten::divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Scalar & other) const { + return at::_ops::divide__Scalar::call(const_cast(*this), other); +} + +// aten::divide.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::divide(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::divide_Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::divide__Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::divide(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::divide_Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::divide__Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::true_divide.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::true_divide(const at::Tensor & other) const { + return at::_ops::true_divide_Tensor::call(const_cast(*this), other); +} + +// aten::true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::true_divide_(const at::Tensor & other) const { + return at::_ops::true_divide__Tensor::call(const_cast(*this), other); +} + +// aten::true_divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::true_divide(const at::Scalar & other) const { + return at::_ops::true_divide_Scalar::call(const_cast(*this), other); +} + +// aten::true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::true_divide_(const at::Scalar & other) const { + return at::_ops::true_divide__Scalar::call(const_cast(*this), other); +} + +// aten::dot(Tensor self, Tensor tensor) -> Tensor +inline at::Tensor Tensor::dot(const at::Tensor & tensor) const { + return at::_ops::dot::call(const_cast(*this), tensor); +} + +// aten::vdot(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::vdot(const at::Tensor & other) const { + return at::_ops::vdot::call(const_cast(*this), other); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_empty::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_empty::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options) const { + return at::_ops::new_empty_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options) const { + return at::_ops::new_empty_strided::call(const_cast(*this), size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty_strided::call(const_cast(*this), size, stride, dtype, layout, device, pin_memory); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full(at::IntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options) const { + return at::_ops::new_full::call(const_cast(*this), c10::fromIntArrayRefSlow(size), fill_value, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full(at::IntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_full::call(const_cast(*this), c10::fromIntArrayRefSlow(size), fill_value, dtype, layout, device, pin_memory); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options) const { + return at::_ops::new_full::call(const_cast(*this), size, fill_value, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_full::call(const_cast(*this), size, fill_value, dtype, layout, device, pin_memory); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_zeros::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_zeros::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_zeros::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_zeros::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_ones::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_ones::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_ones::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_ones::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_(at::IntArrayRef size, ::std::optional memory_format) const { + return at::_ops::resize_::call(const_cast(*this), c10::fromIntArrayRefSlow(size), memory_format); +} + +// aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize__symint(c10::SymIntArrayRef size, ::std::optional memory_format) const { + return at::_ops::resize_::call(const_cast(*this), size, memory_format); +} + +// aten::erf(Tensor self) -> Tensor +inline at::Tensor Tensor::erf() const { + return at::_ops::erf::call(const_cast(*this)); +} + +// aten::erf_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erf_() const { + return at::_ops::erf_::call(const_cast(*this)); +} + +// aten::erfc(Tensor self) -> Tensor +inline at::Tensor Tensor::erfc() const { + return at::_ops::erfc::call(const_cast(*this)); +} + +// aten::erfc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erfc_() const { + return at::_ops::erfc_::call(const_cast(*this)); +} + +// aten::exp(Tensor self) -> Tensor +inline at::Tensor Tensor::exp() const { + return at::_ops::exp::call(const_cast(*this)); +} + +// aten::exp_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::exp_() const { + return at::_ops::exp_::call(const_cast(*this)); +} + +// aten::exp2(Tensor self) -> Tensor +inline at::Tensor Tensor::exp2() const { + return at::_ops::exp2::call(const_cast(*this)); +} + +// aten::exp2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::exp2_() const { + return at::_ops::exp2_::call(const_cast(*this)); +} + +// aten::expm1(Tensor self) -> Tensor +inline at::Tensor Tensor::expm1() const { + return at::_ops::expm1::call(const_cast(*this)); +} + +// aten::expm1_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::expm1_() const { + return at::_ops::expm1_::call(const_cast(*this)); +} + +// aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) +inline at::Tensor Tensor::expand(at::IntArrayRef size, bool implicit) const { + return at::_ops::expand::call(const_cast(*this), c10::fromIntArrayRefSlow(size), implicit); +} + +// aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) +inline at::Tensor Tensor::expand_symint(c10::SymIntArrayRef size, bool implicit) const { + return at::_ops::expand::call(const_cast(*this), size, implicit); +} + +// aten::expand_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::expand_as(const at::Tensor & other) const { + return at::_ops::expand_as::call(const_cast(*this), other); +} + +// aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a) +inline at::Tensor Tensor::flatten(int64_t start_dim, int64_t end_dim) const { + return at::_ops::flatten_using_ints::call(const_cast(*this), start_dim, end_dim); +} + +// aten::flatten.named_out_dim(Tensor(a) self, int start_dim, int end_dim, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(int64_t start_dim, int64_t end_dim, at::Dimname out_dim) const { + return at::_ops::flatten_named_out_dim::call(const_cast(*this), start_dim, end_dim, out_dim); +} + +// aten::flatten.using_names(Tensor(a) self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(at::Dimname start_dim, at::Dimname end_dim, at::Dimname out_dim) const { + return at::_ops::flatten_using_names::call(const_cast(*this), start_dim, end_dim, out_dim); +} + +// aten::flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(at::DimnameList dims, at::Dimname out_dim) const { + return at::_ops::flatten_DimnameList::call(const_cast(*this), dims, out_dim); +} + +// aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a) +inline at::Tensor Tensor::unflatten(int64_t dim, at::IntArrayRef sizes) const { + return at::_ops::unflatten_int::call(const_cast(*this), dim, c10::fromIntArrayRefSlow(sizes)); +} + +// aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a) +inline at::Tensor Tensor::unflatten_symint(int64_t dim, c10::SymIntArrayRef sizes) const { + return at::_ops::unflatten_int::call(const_cast(*this), dim, sizes); +} + +// aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::unflatten(at::Dimname dim, at::IntArrayRef sizes, at::DimnameList names) const { + return at::_ops::unflatten_Dimname::call(const_cast(*this), dim, c10::fromIntArrayRefSlow(sizes), names); +} + +// aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::unflatten_symint(at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names) const { + return at::_ops::unflatten_Dimname::call(const_cast(*this), dim, sizes, names); +} + +// aten::fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::fill_(const at::Scalar & value) const { + return at::_ops::fill__Scalar::call(const_cast(*this), value); +} + +// aten::fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::fill_(const at::Tensor & value) const { + return at::_ops::fill__Tensor::call(const_cast(*this), value); +} + +// aten::floor(Tensor self) -> Tensor +inline at::Tensor Tensor::floor() const { + return at::_ops::floor::call(const_cast(*this)); +} + +// aten::floor_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::floor_() const { + return at::_ops::floor_::call(const_cast(*this)); +} + +// aten::floor_divide(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::floor_divide(const at::Tensor & other) const { + return at::_ops::floor_divide::call(const_cast(*this), other); +} + +// aten::floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::floor_divide_(const at::Tensor & other) const { + return at::_ops::floor_divide__Tensor::call(const_cast(*this), other); +} + +// aten::floor_divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::floor_divide(const at::Scalar & other) const { + return at::_ops::floor_divide_Scalar::call(const_cast(*this), other); +} + +// aten::floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::floor_divide_(const at::Scalar & other) const { + return at::_ops::floor_divide__Scalar::call(const_cast(*this), other); +} + +// aten::frac(Tensor self) -> Tensor +inline at::Tensor Tensor::frac() const { + return at::_ops::frac::call(const_cast(*this)); +} + +// aten::frac_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::frac_() const { + return at::_ops::frac_::call(const_cast(*this)); +} + +// aten::gcd(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::gcd(const at::Tensor & other) const { + return at::_ops::gcd::call(const_cast(*this), other); +} + +// aten::gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::gcd_(const at::Tensor & other) const { + return at::_ops::gcd_::call(const_cast(*this), other); +} + +// aten::lcm(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::lcm(const at::Tensor & other) const { + return at::_ops::lcm::call(const_cast(*this), other); +} + +// aten::lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::lcm_(const at::Tensor & other) const { + return at::_ops::lcm_::call(const_cast(*this), other); +} + +// aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor +inline at::Tensor Tensor::index(const c10::List<::std::optional> & indices) const { + return at::_ops::index_Tensor::call(const_cast(*this), indices); +} + +// aten::index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::index_copy_(int64_t dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy_::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor +inline at::Tensor Tensor::index_copy(int64_t dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::index_copy_(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy__dimname::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor +inline at::Tensor Tensor::index_copy(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy_dimname::call(const_cast(*this), dim, index, source); +} + +// aten::index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!) +inline at::Tensor & Tensor::index_put_(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate) const { + return at::_ops::index_put_::call(const_cast(*this), indices, values, accumulate); +} + +// aten::index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor +inline at::Tensor Tensor::index_put(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate) const { + return at::_ops::index_put::call(const_cast(*this), indices, values, accumulate); +} + +// aten::isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor +inline at::Tensor Tensor::isclose(const at::Tensor & other, double rtol, double atol, bool equal_nan) const { + return at::_ops::isclose::call(const_cast(*this), other, rtol, atol, equal_nan); +} + +// aten::isnan(Tensor self) -> Tensor +inline at::Tensor Tensor::isnan() const { + return at::_ops::isnan::call(const_cast(*this)); +} + +// aten::is_distributed(Tensor self) -> bool +inline bool Tensor::is_distributed() const { + return at::_ops::is_distributed::call(const_cast(*this)); +} + +// aten::is_floating_point(Tensor self) -> bool +inline bool Tensor::__dispatch_is_floating_point() const { + return at::_ops::is_floating_point::call(const_cast(*this)); +} + +// aten::is_complex(Tensor self) -> bool +inline bool Tensor::__dispatch_is_complex() const { + return at::_ops::is_complex::call(const_cast(*this)); +} + +// aten::is_conj(Tensor self) -> bool +inline bool Tensor::__dispatch_is_conj() const { + return at::_ops::is_conj::call(const_cast(*this)); +} + +// aten::_is_zerotensor(Tensor self) -> bool +inline bool Tensor::__dispatch__is_zerotensor() const { + return at::_ops::_is_zerotensor::call(const_cast(*this)); +} + +// aten::is_neg(Tensor self) -> bool +inline bool Tensor::__dispatch_is_neg() const { + return at::_ops::is_neg::call(const_cast(*this)); +} + +// aten::isreal(Tensor self) -> Tensor +inline at::Tensor Tensor::isreal() const { + return at::_ops::isreal::call(const_cast(*this)); +} + +// aten::is_nonzero(Tensor self) -> bool +inline bool Tensor::is_nonzero() const { + return at::_ops::is_nonzero::call(const_cast(*this)); +} + +// aten::is_same_size(Tensor self, Tensor other) -> bool +inline bool Tensor::is_same_size(const at::Tensor & other) const { + return at::_ops::is_same_size::call(const_cast(*this), other); +} + +// aten::is_signed(Tensor self) -> bool +inline bool Tensor::__dispatch_is_signed() const { + return at::_ops::is_signed::call(const_cast(*this)); +} + +// aten::is_inference(Tensor self) -> bool +inline bool Tensor::__dispatch_is_inference() const { + return at::_ops::is_inference::call(const_cast(*this)); +} + +// aten::kron(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::kron(const at::Tensor & other) const { + return at::_ops::kron::call(const_cast(*this), other); +} + +// aten::kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue(int64_t k, int64_t dim, bool keepdim) const { + return at::_ops::kthvalue::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue_symint(c10::SymInt k, int64_t dim, bool keepdim) const { + return at::_ops::kthvalue::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue(int64_t k, at::Dimname dim, bool keepdim) const { + return at::_ops::kthvalue_dimname::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue_symint(c10::SymInt k, at::Dimname dim, bool keepdim) const { + return at::_ops::kthvalue_dimname::call(const_cast(*this), k, dim, keepdim); +} + +// aten::nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor +inline at::Tensor Tensor::nan_to_num(::std::optional nan, ::std::optional posinf, ::std::optional neginf) const { + return at::_ops::nan_to_num::call(const_cast(*this), nan, posinf, neginf); +} + +// aten::nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!) +inline at::Tensor & Tensor::nan_to_num_(::std::optional nan, ::std::optional posinf, ::std::optional neginf) const { + return at::_ops::nan_to_num_::call(const_cast(*this), nan, posinf, neginf); +} + +// aten::ldexp.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ldexp(const at::Tensor & other) const { + return at::_ops::ldexp_Tensor::call(const_cast(*this), other); +} + +// aten::ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ldexp_(const at::Tensor & other) const { + return at::_ops::ldexp_::call(const_cast(*this), other); +} + +// aten::log(Tensor self) -> Tensor +inline at::Tensor Tensor::log() const { + return at::_ops::log::call(const_cast(*this)); +} + +// aten::log_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log_() const { + return at::_ops::log_::call(const_cast(*this)); +} + +// aten::log10(Tensor self) -> Tensor +inline at::Tensor Tensor::log10() const { + return at::_ops::log10::call(const_cast(*this)); +} + +// aten::log10_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log10_() const { + return at::_ops::log10_::call(const_cast(*this)); +} + +// aten::log1p(Tensor self) -> Tensor +inline at::Tensor Tensor::log1p() const { + return at::_ops::log1p::call(const_cast(*this)); +} + +// aten::log1p_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log1p_() const { + return at::_ops::log1p_::call(const_cast(*this)); +} + +// aten::log2(Tensor self) -> Tensor +inline at::Tensor Tensor::log2() const { + return at::_ops::log2::call(const_cast(*this)); +} + +// aten::log2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log2_() const { + return at::_ops::log2_::call(const_cast(*this)); +} + +// aten::logaddexp(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logaddexp(const at::Tensor & other) const { + return at::_ops::logaddexp::call(const_cast(*this), other); +} + +// aten::logaddexp2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logaddexp2(const at::Tensor & other) const { + return at::_ops::logaddexp2::call(const_cast(*this), other); +} + +// aten::xlogy.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::xlogy(const at::Tensor & other) const { + return at::_ops::xlogy_Tensor::call(const_cast(*this), other); +} + +// aten::xlogy.Scalar_Other(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::xlogy(const at::Scalar & other) const { + return at::_ops::xlogy_Scalar_Other::call(const_cast(*this), other); +} + +// aten::xlogy_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::xlogy_(const at::Tensor & other) const { + return at::_ops::xlogy__Tensor::call(const_cast(*this), other); +} + +// aten::xlogy_.Scalar_Other(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::xlogy_(const at::Scalar & other) const { + return at::_ops::xlogy__Scalar_Other::call(const_cast(*this), other); +} + +// aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::log_softmax(int64_t dim, ::std::optional dtype) const { + return at::_ops::log_softmax_int::call(const_cast(*this), dim, dtype); +} + +// aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::log_softmax(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::log_softmax_Dimname::call(const_cast(*this), dim, dtype); +} + +// aten::logcumsumexp(Tensor self, int dim) -> Tensor +inline at::Tensor Tensor::logcumsumexp(int64_t dim) const { + return at::_ops::logcumsumexp::call(const_cast(*this), dim); +} + +// aten::logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor +inline at::Tensor Tensor::logcumsumexp(at::Dimname dim) const { + return at::_ops::logcumsumexp_dimname::call(const_cast(*this), dim); +} + +// aten::logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::logsumexp(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::logsumexp::call(const_cast(*this), dim, keepdim); +} + +// aten::logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::logsumexp(at::DimnameList dim, bool keepdim) const { + return at::_ops::logsumexp_names::call(const_cast(*this), dim, keepdim); +} + +// aten::matmul(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::matmul(const at::Tensor & other) const { + return at::_ops::matmul::call(const_cast(*this), other); +} + +// aten::matrix_power(Tensor self, int n) -> Tensor +inline at::Tensor Tensor::matrix_power(int64_t n) const { + return at::_ops::matrix_power::call(const_cast(*this), n); +} + +// aten::matrix_exp(Tensor self) -> Tensor +inline at::Tensor Tensor::matrix_exp() const { + return at::_ops::matrix_exp::call(const_cast(*this)); +} + +// aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max) +inline ::std::tuple Tensor::aminmax(::std::optional dim, bool keepdim) const { + return at::_ops::aminmax::call(const_cast(*this), dim, keepdim); +} + +// aten::max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::max(int64_t dim, bool keepdim) const { + return at::_ops::max_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::max(at::Dimname dim, bool keepdim) const { + return at::_ops::max_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor +inline at::Tensor Tensor::amax(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::amax::call(const_cast(*this), dim, keepdim); +} + +// aten::mean(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(::std::optional dtype) const { + return at::_ops::mean::call(const_cast(*this), dtype); +} + +// aten::mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::mean_dim::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(at::DimnameList dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::mean_names_dim::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::nanmean(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::nanmean::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::median(Tensor self) -> Tensor +inline at::Tensor Tensor::median() const { + return at::_ops::median::call(const_cast(*this)); +} + +// aten::median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::median(int64_t dim, bool keepdim) const { + return at::_ops::median_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::median.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::median(at::Dimname dim, bool keepdim) const { + return at::_ops::median_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::nanmedian(Tensor self) -> Tensor +inline at::Tensor Tensor::nanmedian() const { + return at::_ops::nanmedian::call(const_cast(*this)); +} + +// aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::nanmedian(int64_t dim, bool keepdim) const { + return at::_ops::nanmedian_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::nanmedian(at::Dimname dim, bool keepdim) const { + return at::_ops::nanmedian_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::min(int64_t dim, bool keepdim) const { + return at::_ops::min_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::min(at::Dimname dim, bool keepdim) const { + return at::_ops::min_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor +inline at::Tensor Tensor::amin(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::amin::call(const_cast(*this), dim, keepdim); +} + +// aten::mm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::mm(const at::Tensor & mat2) const { + return at::_ops::mm::call(const_cast(*this), mat2); +} + +// aten::mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::mode(int64_t dim, bool keepdim) const { + return at::_ops::mode::call(const_cast(*this), dim, keepdim); +} + +// aten::mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::mode(at::Dimname dim, bool keepdim) const { + return at::_ops::mode_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::mul.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::mul(const at::Tensor & other) const { + return at::_ops::mul_Tensor::call(const_cast(*this), other); +} + +// aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::mul_(const at::Tensor & other) const { + return at::_ops::mul__Tensor::call(const_cast(*this), other); +} + +// aten::mul.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::mul(const at::Scalar & other) const { + return at::_ops::mul_Scalar::call(const_cast(*this), other); +} + +// aten::mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::mul_(const at::Scalar & other) const { + return at::_ops::mul__Scalar::call(const_cast(*this), other); +} + +// aten::multiply.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::multiply(const at::Tensor & other) const { + return at::_ops::multiply_Tensor::call(const_cast(*this), other); +} + +// aten::multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::multiply_(const at::Tensor & other) const { + return at::_ops::multiply__Tensor::call(const_cast(*this), other); +} + +// aten::multiply.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::multiply(const at::Scalar & other) const { + return at::_ops::multiply_Scalar::call(const_cast(*this), other); +} + +// aten::multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::multiply_(const at::Scalar & other) const { + return at::_ops::multiply__Scalar::call(const_cast(*this), other); +} + +// aten::mv(Tensor self, Tensor vec) -> Tensor +inline at::Tensor Tensor::mv(const at::Tensor & vec) const { + return at::_ops::mv::call(const_cast(*this), vec); +} + +// aten::mvlgamma(Tensor self, int p) -> Tensor +inline at::Tensor Tensor::mvlgamma(int64_t p) const { + return at::_ops::mvlgamma::call(const_cast(*this), p); +} + +// aten::mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!) +inline at::Tensor & Tensor::mvlgamma_(int64_t p) const { + return at::_ops::mvlgamma_::call(const_cast(*this), p); +} + +// aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor +inline at::Tensor Tensor::narrow_copy(int64_t dim, int64_t start, int64_t length) const { + return at::_ops::narrow_copy::call(const_cast(*this), dim, start, length); +} + +// aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor +inline at::Tensor Tensor::narrow_copy_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const { + return at::_ops::narrow_copy::call(const_cast(*this), dim, start, length); +} + +// aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow(int64_t dim, int64_t start, int64_t length) const { + return at::_ops::narrow::call(const_cast(*this), dim, start, length); +} + +// aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const { + return at::_ops::narrow::call(const_cast(*this), dim, start, length); +} + +// aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow(int64_t dim, const at::Tensor & start, int64_t length) const { + return at::_ops::narrow_Tensor::call(const_cast(*this), dim, start, length); +} + +// aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow_symint(int64_t dim, const at::Tensor & start, c10::SymInt length) const { + return at::_ops::narrow_Tensor::call(const_cast(*this), dim, start, length); +} + +// aten::permute(Tensor(a) self, int[] dims) -> Tensor(a) +inline at::Tensor Tensor::permute(at::IntArrayRef dims) const { + return at::_ops::permute::call(const_cast(*this), dims); +} + +// aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor Tensor::movedim(at::IntArrayRef source, at::IntArrayRef destination) const { + return at::_ops::movedim_intlist::call(const_cast(*this), source, destination); +} + +// aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor Tensor::movedim(int64_t source, int64_t destination) const { + return at::_ops::movedim_int::call(const_cast(*this), source, destination); +} + +// aten::moveaxis.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor Tensor::moveaxis(at::IntArrayRef source, at::IntArrayRef destination) const { + return at::_ops::moveaxis_intlist::call(const_cast(*this), source, destination); +} + +// aten::moveaxis.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor Tensor::moveaxis(int64_t source, int64_t destination) const { + return at::_ops::moveaxis_int::call(const_cast(*this), source, destination); +} + +// aten::numpy_T(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::numpy_T() const { + return at::_ops::numpy_T::call(const_cast(*this)); +} + +// aten::matrix_H(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::matrix_H() const { + return at::_ops::matrix_H::call(const_cast(*this)); +} + +// aten::mT(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::mT() const { + return at::_ops::mT::call(const_cast(*this)); +} + +// aten::mH(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::mH() const { + return at::_ops::mH::call(const_cast(*this)); +} + +// aten::adjoint(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::adjoint() const { + return at::_ops::adjoint::call(const_cast(*this)); +} + +// aten::is_pinned(Tensor self, Device? device=None) -> bool +inline bool Tensor::is_pinned(::std::optional device) const { + return at::_ops::is_pinned::call(const_cast(*this), device); +} + +// aten::pin_memory(Tensor(a) self, Device? device=None) -> Tensor(a) +inline at::Tensor Tensor::pin_memory(::std::optional device) const { + return at::_ops::pin_memory::call(const_cast(*this), device); +} + +// aten::pinverse(Tensor self, float rcond=1e-15) -> Tensor +inline at::Tensor Tensor::pinverse(double rcond) const { + return at::_ops::pinverse::call(const_cast(*this), rcond); +} + +// aten::rad2deg(Tensor self) -> Tensor +inline at::Tensor Tensor::rad2deg() const { + return at::_ops::rad2deg::call(const_cast(*this)); +} + +// aten::rad2deg_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::rad2deg_() const { + return at::_ops::rad2deg_::call(const_cast(*this)); +} + +// aten::deg2rad(Tensor self) -> Tensor +inline at::Tensor Tensor::deg2rad() const { + return at::_ops::deg2rad::call(const_cast(*this)); +} + +// aten::deg2rad_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::deg2rad_() const { + return at::_ops::deg2rad_::call(const_cast(*this)); +} + +// aten::ravel(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::ravel() const { + return at::_ops::ravel::call(const_cast(*this)); +} + +// aten::reciprocal(Tensor self) -> Tensor +inline at::Tensor Tensor::reciprocal() const { + return at::_ops::reciprocal::call(const_cast(*this)); +} + +// aten::reciprocal_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::reciprocal_() const { + return at::_ops::reciprocal_::call(const_cast(*this)); +} + +// aten::neg(Tensor self) -> Tensor +inline at::Tensor Tensor::neg() const { + return at::_ops::neg::call(const_cast(*this)); +} + +// aten::neg_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::neg_() const { + return at::_ops::neg_::call(const_cast(*this)); +} + +// aten::negative(Tensor self) -> Tensor +inline at::Tensor Tensor::negative() const { + return at::_ops::negative::call(const_cast(*this)); +} + +// aten::negative_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::negative_() const { + return at::_ops::negative_::call(const_cast(*this)); +} + +// aten::repeat(Tensor self, SymInt[] repeats) -> Tensor +inline at::Tensor Tensor::repeat(at::IntArrayRef repeats) const { + return at::_ops::repeat::call(const_cast(*this), c10::fromIntArrayRefSlow(repeats)); +} + +// aten::repeat(Tensor self, SymInt[] repeats) -> Tensor +inline at::Tensor Tensor::repeat_symint(c10::SymIntArrayRef repeats) const { + return at::_ops::repeat::call(const_cast(*this), repeats); +} + +// aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave(const at::Tensor & repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_Tensor::call(const_cast(*this), repeats, dim, output_size.has_value() ? ::std::make_optional(c10::SymInt(*output_size)) : ::std::nullopt); +} + +// aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave_symint(const at::Tensor & repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_Tensor::call(const_cast(*this), repeats, dim, output_size); +} + +// aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave(int64_t repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_int::call(const_cast(*this), repeats, dim, output_size.has_value() ? ::std::make_optional(c10::SymInt(*output_size)) : ::std::nullopt); +} + +// aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave_symint(c10::SymInt repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_int::call(const_cast(*this), repeats, dim, output_size); +} + +// aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a) +inline at::Tensor Tensor::reshape(at::IntArrayRef shape) const { + return at::_ops::reshape::call(const_cast(*this), c10::fromIntArrayRefSlow(shape)); +} + +// aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a) +inline at::Tensor Tensor::reshape_symint(c10::SymIntArrayRef shape) const { + return at::_ops::reshape::call(const_cast(*this), shape); +} + +// aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a) +inline at::Tensor Tensor::_reshape_alias(at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::_reshape_alias::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a) +inline at::Tensor Tensor::_reshape_alias_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::_reshape_alias::call(const_cast(*this), size, stride); +} + +// aten::reshape_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::reshape_as(const at::Tensor & other) const { + return at::_ops::reshape_as::call(const_cast(*this), other); +} + +// aten::round(Tensor self) -> Tensor +inline at::Tensor Tensor::round() const { + return at::_ops::round::call(const_cast(*this)); +} + +// aten::round_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::round_() const { + return at::_ops::round_::call(const_cast(*this)); +} + +// aten::round.decimals(Tensor self, *, int decimals) -> Tensor +inline at::Tensor Tensor::round(int64_t decimals) const { + return at::_ops::round_decimals::call(const_cast(*this), decimals); +} + +// aten::round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!) +inline at::Tensor & Tensor::round_(int64_t decimals) const { + return at::_ops::round__decimals::call(const_cast(*this), decimals); +} + +// aten::relu(Tensor self) -> Tensor +inline at::Tensor Tensor::relu() const { + return at::_ops::relu::call(const_cast(*this)); +} + +// aten::relu_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::relu_() const { + return at::_ops::relu_::call(const_cast(*this)); +} + +// aten::prelu(Tensor self, Tensor weight) -> Tensor +inline at::Tensor Tensor::prelu(const at::Tensor & weight) const { + return at::_ops::prelu::call(const_cast(*this), weight); +} + +// aten::hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor +inline at::Tensor Tensor::hardshrink(const at::Scalar & lambd) const { + return at::_ops::hardshrink::call(const_cast(*this), lambd); +} + +// aten::hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor +inline at::Tensor Tensor::hardshrink_backward(const at::Tensor & grad_out, const at::Scalar & lambd) const { + return at::_ops::hardshrink_backward::call(grad_out, const_cast(*this), lambd); +} + +// aten::rsqrt(Tensor self) -> Tensor +inline at::Tensor Tensor::rsqrt() const { + return at::_ops::rsqrt::call(const_cast(*this)); +} + +// aten::rsqrt_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::rsqrt_() const { + return at::_ops::rsqrt_::call(const_cast(*this)); +} + +// aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) +inline at::Tensor Tensor::select(at::Dimname dim, int64_t index) const { + return at::_ops::select_Dimname::call(const_cast(*this), dim, index); +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor Tensor::select(int64_t dim, int64_t index) const { + return at::_ops::select_int::call(const_cast(*this), dim, index); +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor Tensor::select_symint(int64_t dim, c10::SymInt index) const { + return at::_ops::select_int::call(const_cast(*this), dim, index); +} + +// aten::sigmoid(Tensor self) -> Tensor +inline at::Tensor Tensor::sigmoid() const { + return at::_ops::sigmoid::call(const_cast(*this)); +} + +// aten::sigmoid_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sigmoid_() const { + return at::_ops::sigmoid_::call(const_cast(*this)); +} + +// aten::logit(Tensor self, float? eps=None) -> Tensor +inline at::Tensor Tensor::logit(::std::optional eps) const { + return at::_ops::logit::call(const_cast(*this), eps); +} + +// aten::logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!) +inline at::Tensor & Tensor::logit_(::std::optional eps) const { + return at::_ops::logit_::call(const_cast(*this), eps); +} + +// aten::sin(Tensor self) -> Tensor +inline at::Tensor Tensor::sin() const { + return at::_ops::sin::call(const_cast(*this)); +} + +// aten::sin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sin_() const { + return at::_ops::sin_::call(const_cast(*this)); +} + +// aten::sinc(Tensor self) -> Tensor +inline at::Tensor Tensor::sinc() const { + return at::_ops::sinc::call(const_cast(*this)); +} + +// aten::sinc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sinc_() const { + return at::_ops::sinc_::call(const_cast(*this)); +} + +// aten::sinh(Tensor self) -> Tensor +inline at::Tensor Tensor::sinh() const { + return at::_ops::sinh::call(const_cast(*this)); +} + +// aten::sinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sinh_() const { + return at::_ops::sinh_::call(const_cast(*this)); +} + +// aten::detach(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::detach() const { + return at::_ops::detach::call(const_cast(*this)); +} + +// aten::detach_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::detach_() const { + return at::_ops::detach_::call(const_cast(*this)); +} + +// aten::size.Dimname(Tensor self, Dimname dim) -> int +inline int64_t Tensor::size(at::Dimname dim) const { + return at::_ops::size_Dimname::call(const_cast(*this), dim); +} + +// aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice(int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_Tensor::call(const_cast(*this), dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_symint(int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_Tensor::call(const_cast(*this), dim, start, end, step); +} + +// aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_inverse(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_inverse::call(const_cast(*this), src, dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_inverse_symint(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_inverse::call(const_cast(*this), src, dim, start, end, step); +} + +// aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor +inline at::Tensor Tensor::slice_scatter(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_scatter::call(const_cast(*this), src, dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor +inline at::Tensor Tensor::slice_scatter_symint(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_scatter::call(const_cast(*this), src, dim, start, end, step); +} + +// aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor +inline at::Tensor Tensor::select_scatter(const at::Tensor & src, int64_t dim, int64_t index) const { + return at::_ops::select_scatter::call(const_cast(*this), src, dim, index); +} + +// aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor +inline at::Tensor Tensor::select_scatter_symint(const at::Tensor & src, int64_t dim, c10::SymInt index) const { + return at::_ops::select_scatter::call(const_cast(*this), src, dim, index); +} + +// aten::diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor +inline at::Tensor Tensor::diagonal_scatter(const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diagonal_scatter::call(const_cast(*this), src, offset, dim1, dim2); +} + +// aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor +inline at::Tensor Tensor::as_strided_scatter(const at::Tensor & src, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_scatter::call(const_cast(*this), src, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor +inline at::Tensor Tensor::as_strided_scatter_symint(const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_scatter::call(const_cast(*this), src, size, stride, storage_offset); +} + +// aten::smm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::smm(const at::Tensor & mat2) const { + return at::_ops::smm::call(const_cast(*this), mat2); +} + +// aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::softmax(int64_t dim, ::std::optional dtype) const { + return at::_ops::softmax_int::call(const_cast(*this), dim, dtype); +} + +// aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::softmax(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::softmax_Dimname::call(const_cast(*this), dim, dtype); +} + +// aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split(int64_t split_size, int64_t dim) const { + return at::_ops::unsafe_split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_symint(c10::SymInt split_size, int64_t dim) const { + return at::_ops::unsafe_split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split(int64_t split_size, int64_t dim) const { + return at::_ops::split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_symint(c10::SymInt split_size, int64_t dim) const { + return at::_ops::split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split(at::IntArrayRef split_size, int64_t dim) const { + return at::_ops::split_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_size), dim); +} + +// aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_symint(c10::SymIntArrayRef split_size, int64_t dim) const { + return at::_ops::split_sizes::call(const_cast(*this), split_size, dim); +} + +// aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_with_sizes(at::IntArrayRef split_sizes, int64_t dim) const { + return at::_ops::unsafe_split_with_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_sizes), dim); +} + +// aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim) const { + return at::_ops::unsafe_split_with_sizes::call(const_cast(*this), split_sizes, dim); +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_with_sizes(at::IntArrayRef split_sizes, int64_t dim) const { + return at::_ops::split_with_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_sizes), dim); +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim) const { + return at::_ops::split_with_sizes::call(const_cast(*this), split_sizes, dim); +} + +// aten::hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::hsplit(int64_t sections) const { + return at::_ops::hsplit_int::call(const_cast(*this), sections); +} + +// aten::hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::hsplit(at::IntArrayRef indices) const { + return at::_ops::hsplit_array::call(const_cast(*this), indices); +} + +// aten::vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::vsplit(int64_t sections) const { + return at::_ops::vsplit_int::call(const_cast(*this), sections); +} + +// aten::vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::vsplit(at::IntArrayRef indices) const { + return at::_ops::vsplit_array::call(const_cast(*this), indices); +} + +// aten::dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::dsplit(int64_t sections) const { + return at::_ops::dsplit_int::call(const_cast(*this), sections); +} + +// aten::dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::dsplit(at::IntArrayRef indices) const { + return at::_ops::dsplit_array::call(const_cast(*this), indices); +} + +// aten::squeeze(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::squeeze() const { + return at::_ops::squeeze::call(const_cast(*this)); +} + +// aten::squeeze.dim(Tensor(a) self, int dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(int64_t dim) const { + return at::_ops::squeeze_dim::call(const_cast(*this), dim); +} + +// aten::squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(at::Dimname dim) const { + return at::_ops::squeeze_dimname::call(const_cast(*this), dim); +} + +// aten::squeeze.dims(Tensor(a) self, int[] dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(at::IntArrayRef dim) const { + return at::_ops::squeeze_dims::call(const_cast(*this), dim); +} + +// aten::squeeze_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_() const { + return at::_ops::squeeze_::call(const_cast(*this)); +} + +// aten::squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(int64_t dim) const { + return at::_ops::squeeze__dim::call(const_cast(*this), dim); +} + +// aten::squeeze_.dims(Tensor(a!) self, int[] dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(at::IntArrayRef dim) const { + return at::_ops::squeeze__dims::call(const_cast(*this), dim); +} + +// aten::squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(at::Dimname dim) const { + return at::_ops::squeeze__dimname::call(const_cast(*this), dim); +} + +// aten::sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sspaddmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::sspaddmm::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor Tensor::stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided, ::std::optional return_complex, ::std::optional align_to_window) const { + return at::_ops::stft::call(const_cast(*this), n_fft, hop_length, win_length, window, normalized, onesided, return_complex, align_to_window); +} + +// aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor Tensor::stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, c10::string_view pad_mode, bool normalized, ::std::optional onesided, ::std::optional return_complex, ::std::optional align_to_window) const { + return at::_ops::stft_center::call(const_cast(*this), n_fft, hop_length, win_length, window, center, pad_mode, normalized, onesided, return_complex, align_to_window); +} + +// aten::istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor +inline at::Tensor Tensor::istft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, bool normalized, ::std::optional onesided, ::std::optional length, bool return_complex) const { + return at::_ops::istft::call(const_cast(*this), n_fft, hop_length, win_length, window, center, normalized, onesided, length, return_complex); +} + +// aten::stride.Dimname(Tensor self, Dimname dim) -> int +inline int64_t Tensor::stride(at::Dimname dim) const { + return at::_ops::stride_Dimname::call(const_cast(*this), dim); +} + +// aten::sum(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(::std::optional dtype) const { + return at::_ops::sum::call(const_cast(*this), dtype); +} + +// aten::sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::sum_dim_IntList::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(at::DimnameList dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::sum_dim_DimnameList::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::nansum(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::nansum(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::nansum::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor +inline at::Tensor Tensor::hash_tensor(at::IntArrayRef dim, bool keepdim, int64_t mode) const { + return at::_ops::hash_tensor::call(const_cast(*this), dim, keepdim, mode); +} + +// aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor Tensor::sum_to_size(at::IntArrayRef size) const { + return at::_ops::sum_to_size::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor Tensor::sum_to_size_symint(c10::SymIntArrayRef size) const { + return at::_ops::sum_to_size::call(const_cast(*this), size); +} + +// aten::sqrt(Tensor self) -> Tensor +inline at::Tensor Tensor::sqrt() const { + return at::_ops::sqrt::call(const_cast(*this)); +} + +// aten::sqrt_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sqrt_() const { + return at::_ops::sqrt_::call(const_cast(*this)); +} + +// aten::square(Tensor self) -> Tensor +inline at::Tensor Tensor::square() const { + return at::_ops::square::call(const_cast(*this)); +} + +// aten::square_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::square_() const { + return at::_ops::square_::call(const_cast(*this)); +} + +// aten::std(Tensor self, bool unbiased=True) -> Tensor +inline at::Tensor Tensor::std(bool unbiased) const { + return at::_ops::std::call(const_cast(*this), unbiased); +} + +// aten::std.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim) const { + return at::_ops::std_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::std_correction::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::DimnameList dim, bool unbiased, bool keepdim) const { + return at::_ops::std_names_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::DimnameList dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::std_correction_names::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::prod(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(::std::optional dtype) const { + return at::_ops::prod::call(const_cast(*this), dtype); +} + +// aten::prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(int64_t dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::prod_dim_int::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(at::Dimname dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::prod_dim_Dimname::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::t(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::t() const { + return at::_ops::t::call(const_cast(*this)); +} + +// aten::t_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::t_() const { + return at::_ops::t_::call(const_cast(*this)); +} + +// aten::tan(Tensor self) -> Tensor +inline at::Tensor Tensor::tan() const { + return at::_ops::tan::call(const_cast(*this)); +} + +// aten::tan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::tan_() const { + return at::_ops::tan_::call(const_cast(*this)); +} + +// aten::tanh(Tensor self) -> Tensor +inline at::Tensor Tensor::tanh() const { + return at::_ops::tanh::call(const_cast(*this)); +} + +// aten::tanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::tanh_() const { + return at::_ops::tanh_::call(const_cast(*this)); +} + +// aten::tile(Tensor self, SymInt[] dims) -> Tensor +inline at::Tensor Tensor::tile(at::IntArrayRef dims) const { + return at::_ops::tile::call(const_cast(*this), c10::fromIntArrayRefSlow(dims)); +} + +// aten::tile(Tensor self, SymInt[] dims) -> Tensor +inline at::Tensor Tensor::tile_symint(c10::SymIntArrayRef dims) const { + return at::_ops::tile::call(const_cast(*this), dims); +} + +// aten::transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor Tensor::transpose(int64_t dim0, int64_t dim1) const { + return at::_ops::transpose_int::call(const_cast(*this), dim0, dim1); +} + +// aten::transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a) +inline at::Tensor Tensor::transpose(at::Dimname dim0, at::Dimname dim1) const { + return at::_ops::transpose_Dimname::call(const_cast(*this), dim0, dim1); +} + +// aten::transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) +inline at::Tensor & Tensor::transpose_(int64_t dim0, int64_t dim1) const { + return at::_ops::transpose_::call(const_cast(*this), dim0, dim1); +} + +// aten::flip(Tensor self, int[] dims) -> Tensor +inline at::Tensor Tensor::flip(at::IntArrayRef dims) const { + return at::_ops::flip::call(const_cast(*this), dims); +} + +// aten::fliplr(Tensor self) -> Tensor +inline at::Tensor Tensor::fliplr() const { + return at::_ops::fliplr::call(const_cast(*this)); +} + +// aten::flipud(Tensor self) -> Tensor +inline at::Tensor Tensor::flipud() const { + return at::_ops::flipud::call(const_cast(*this)); +} + +// aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor +inline at::Tensor Tensor::roll(at::IntArrayRef shifts, at::IntArrayRef dims) const { + return at::_ops::roll::call(const_cast(*this), c10::fromIntArrayRefSlow(shifts), dims); +} + +// aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor +inline at::Tensor Tensor::roll_symint(c10::SymIntArrayRef shifts, at::IntArrayRef dims) const { + return at::_ops::roll::call(const_cast(*this), shifts, dims); +} + +// aten::rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor +inline at::Tensor Tensor::rot90(int64_t k, at::IntArrayRef dims) const { + return at::_ops::rot90::call(const_cast(*this), k, dims); +} + +// aten::_nested_tensor_size(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_size() const { + return at::_ops::_nested_tensor_size::call(const_cast(*this)); +} + +// aten::_nested_tensor_strides(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_strides() const { + return at::_ops::_nested_tensor_strides::call(const_cast(*this)); +} + +// aten::_nested_tensor_storage_offsets(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_storage_offsets() const { + return at::_ops::_nested_tensor_storage_offsets::call(const_cast(*this)); +} + +// aten::trunc(Tensor self) -> Tensor +inline at::Tensor Tensor::trunc() const { + return at::_ops::trunc::call(const_cast(*this)); +} + +// aten::trunc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::trunc_() const { + return at::_ops::trunc_::call(const_cast(*this)); +} + +// aten::fix(Tensor self) -> Tensor +inline at::Tensor Tensor::fix() const { + return at::_ops::fix::call(const_cast(*this)); +} + +// aten::fix_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::fix_() const { + return at::_ops::fix_::call(const_cast(*this)); +} + +// aten::type_as(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::type_as(const at::Tensor & other) const { + return at::_ops::type_as::call(const_cast(*this), other); +} + +// aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a) +inline at::Tensor Tensor::unsqueeze(int64_t dim) const { + return at::_ops::unsqueeze::call(const_cast(*this), dim); +} + +// aten::unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!) +inline at::Tensor & Tensor::unsqueeze_(int64_t dim) const { + return at::_ops::unsqueeze_::call(const_cast(*this), dim); +} + +// aten::var(Tensor self, bool unbiased=True) -> Tensor +inline at::Tensor Tensor::var(bool unbiased) const { + return at::_ops::var::call(const_cast(*this), unbiased); +} + +// aten::var.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim) const { + return at::_ops::var_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::var_correction::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::DimnameList dim, bool unbiased, bool keepdim) const { + return at::_ops::var_names_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::DimnameList dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::var_correction_names::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::view_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::view_as(const at::Tensor & other) const { + return at::_ops::view_as::call(const_cast(*this), other); +} + +// aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::where(const at::Tensor & condition, const at::Tensor & other) const { + return at::_ops::where_self::call(condition, const_cast(*this), other); +} + +// aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::where(const at::Tensor & condition, const at::Scalar & other) const { + return at::_ops::where_ScalarOther::call(condition, const_cast(*this), other); +} + +// aten::norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::ScalarType dtype) const { + return at::_ops::norm_ScalarOpt_dtype::call(const_cast(*this), p, dtype); +} + +// aten::norm.Scalar(Tensor self, Scalar p=2) -> Tensor +inline at::Tensor Tensor::norm(const at::Scalar & p) const { + return at::_ops::norm_Scalar::call(const_cast(*this), p); +} + +// aten::norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype) const { + return at::_ops::norm_ScalarOpt_dim_dtype::call(const_cast(*this), p, dim, keepdim, dtype); +} + +// aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim) const { + return at::_ops::norm_ScalarOpt_dim::call(const_cast(*this), p, dim, keepdim); +} + +// aten::norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::DimnameList dim, bool keepdim, at::ScalarType dtype) const { + return at::_ops::norm_names_ScalarOpt_dim_dtype::call(const_cast(*this), p, dim, keepdim, dtype); +} + +// aten::norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::DimnameList dim, bool keepdim) const { + return at::_ops::norm_names_ScalarOpt_dim::call(const_cast(*this), p, dim, keepdim); +} + +// aten::frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent) +inline ::std::tuple Tensor::frexp() const { + return at::_ops::frexp_Tensor::call(const_cast(*this)); +} + +// aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor Tensor::clone(::std::optional memory_format) const { + return at::_ops::clone::call(const_cast(*this), memory_format); +} + +// aten::positive(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::positive() const { + return at::_ops::positive::call(const_cast(*this)); +} + +// aten::resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_as_(const at::Tensor & the_template, ::std::optional memory_format) const { + return at::_ops::resize_as_::call(const_cast(*this), the_template, memory_format); +} + +// aten::resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_as_sparse_(const at::Tensor & the_template) const { + return at::_ops::resize_as_sparse_::call(const_cast(*this), the_template); +} + +// aten::zero_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::zero_() const { + return at::_ops::zero_::call(const_cast(*this)); +} + +// aten::sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sub(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::sub_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::sub_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::sub__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sub(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::sub_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::sub_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::sub__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::subtract.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::subtract(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::subtract_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::subtract_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::subtract_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::subtract__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::subtract.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::subtract(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::subtract_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::subtract_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::subtract_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::subtract__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::heaviside(Tensor self, Tensor values) -> Tensor +inline at::Tensor Tensor::heaviside(const at::Tensor & values) const { + return at::_ops::heaviside::call(const_cast(*this), values); +} + +// aten::heaviside_(Tensor(a!) self, Tensor values) -> Tensor(a!) +inline at::Tensor & Tensor::heaviside_(const at::Tensor & values) const { + return at::_ops::heaviside_::call(const_cast(*this), values); +} + +// aten::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmm::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addmm_(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmm_::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::_addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor +inline at::Tensor Tensor::_addmm_activation(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, bool use_gelu) const { + return at::_ops::_addmm_activation::call(const_cast(*this), mat1, mat2, beta, alpha, use_gelu); +} + +// aten::sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!) +inline const at::Tensor & Tensor::sparse_resize_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const { + return at::_ops::sparse_resize_::call(const_cast(*this), size, sparse_dim, dense_dim); +} + +// aten::sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!) +inline const at::Tensor & Tensor::sparse_resize_and_clear_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const { + return at::_ops::sparse_resize_and_clear_::call(const_cast(*this), size, sparse_dim, dense_dim); +} + +// aten::sparse_mask(Tensor self, Tensor mask) -> Tensor +inline at::Tensor Tensor::sparse_mask(const at::Tensor & mask) const { + return at::_ops::sparse_mask::call(const_cast(*this), mask); +} + +// aten::_sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor +inline at::Tensor Tensor::_sparse_mask_projection(const at::Tensor & mask, bool accumulate_matches) const { + return at::_ops::_sparse_mask_projection::call(const_cast(*this), mask, accumulate_matches); +} + +// aten::to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor +inline at::Tensor Tensor::to_dense(::std::optional dtype, ::std::optional masked_grad) const { + return at::_ops::to_dense::call(const_cast(*this), dtype, masked_grad); +} + +// aten::_to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor +inline at::Tensor Tensor::_to_dense(::std::optional dtype, ::std::optional masked_grad) const { + return at::_ops::_to_dense::call(const_cast(*this), dtype, masked_grad); +} + +// aten::sparse_dim(Tensor self) -> int +inline int64_t Tensor::sparse_dim() const { + return at::_ops::sparse_dim::call(const_cast(*this)); +} + +// aten::_dimI(Tensor self) -> int +inline int64_t Tensor::_dimI() const { + return at::_ops::_dimI::call(const_cast(*this)); +} + +// aten::dense_dim(Tensor self) -> int +inline int64_t Tensor::dense_dim() const { + return at::_ops::dense_dim::call(const_cast(*this)); +} + +// aten::_dimV(Tensor self) -> int +inline int64_t Tensor::_dimV() const { + return at::_ops::_dimV::call(const_cast(*this)); +} + +// aten::_nnz(Tensor self) -> int +inline int64_t Tensor::_nnz() const { + return at::_ops::_nnz::call(const_cast(*this)); +} + +// aten::coalesce(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::coalesce() const { + return at::_ops::coalesce::call(const_cast(*this)); +} + +// aten::is_coalesced(Tensor self) -> bool +inline bool Tensor::is_coalesced() const { + return at::_ops::is_coalesced::call(const_cast(*this)); +} + +// aten::_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_indices() const { + return at::_ops::_indices::call(const_cast(*this)); +} + +// aten::_values(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_values() const { + return at::_ops::_values::call(const_cast(*this)); +} + +// aten::_coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!) +inline at::Tensor & Tensor::_coalesced_(bool coalesced) const { + return at::_ops::_coalesced_::call(const_cast(*this), coalesced); +} + +// aten::indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::indices() const { + return at::_ops::indices::call(const_cast(*this)); +} + +// aten::values(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::values() const { + return at::_ops::values::call(const_cast(*this)); +} + +// aten::crow_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::crow_indices() const { + return at::_ops::crow_indices::call(const_cast(*this)); +} + +// aten::col_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::col_indices() const { + return at::_ops::col_indices::call(const_cast(*this)); +} + +// aten::ccol_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::ccol_indices() const { + return at::_ops::ccol_indices::call(const_cast(*this)); +} + +// aten::row_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::row_indices() const { + return at::_ops::row_indices::call(const_cast(*this)); +} + +// aten::unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::unbind(int64_t dim) const { + return at::_ops::unbind_int::call(const_cast(*this), dim); +} + +// aten::unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[] +inline ::std::vector Tensor::unbind(at::Dimname dim) const { + return at::_ops::unbind_Dimname::call(const_cast(*this), dim); +} + +// aten::to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor +inline at::Tensor Tensor::to_sparse(int64_t sparse_dim) const { + return at::_ops::to_sparse_sparse_dim::call(const_cast(*this), sparse_dim); +} + +// aten::_to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor +inline at::Tensor Tensor::_to_sparse(int64_t sparse_dim) const { + return at::_ops::_to_sparse_sparse_dim::call(const_cast(*this), sparse_dim); +} + +// aten::to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse(::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse::call(const_cast(*this), layout, blocksize, dense_dim); +} + +// aten::_to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse(::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse::call(const_cast(*this), layout, blocksize, dense_dim); +} + +// aten::to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_csr(::std::optional dense_dim) const { + return at::_ops::to_sparse_csr::call(const_cast(*this), dense_dim); +} + +// aten::_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_csr(::std::optional dense_dim) const { + return at::_ops::_to_sparse_csr::call(const_cast(*this), dense_dim); +} + +// aten::to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_csc(::std::optional dense_dim) const { + return at::_ops::to_sparse_csc::call(const_cast(*this), dense_dim); +} + +// aten::_to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_csc(::std::optional dense_dim) const { + return at::_ops::_to_sparse_csc::call(const_cast(*this), dense_dim); +} + +// aten::to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse_bsr::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::_to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse_bsr::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse_bsc::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::_to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse_bsc::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::to_mkldnn(::std::optional dtype) const { + return at::_ops::to_mkldnn::call(const_cast(*this), dtype); +} + +// aten::dequantize.self(Tensor self) -> Tensor +inline at::Tensor Tensor::dequantize() const { + return at::_ops::dequantize_self::call(const_cast(*this)); +} + +// aten::q_scale(Tensor self) -> float +inline double Tensor::q_scale() const { + return at::_ops::q_scale::call(const_cast(*this)); +} + +// aten::q_zero_point(Tensor self) -> int +inline int64_t Tensor::q_zero_point() const { + return at::_ops::q_zero_point::call(const_cast(*this)); +} + +// aten::q_per_channel_scales(Tensor self) -> Tensor +inline at::Tensor Tensor::q_per_channel_scales() const { + return at::_ops::q_per_channel_scales::call(const_cast(*this)); +} + +// aten::q_per_channel_zero_points(Tensor self) -> Tensor +inline at::Tensor Tensor::q_per_channel_zero_points() const { + return at::_ops::q_per_channel_zero_points::call(const_cast(*this)); +} + +// aten::q_per_channel_axis(Tensor self) -> int +inline int64_t Tensor::q_per_channel_axis() const { + return at::_ops::q_per_channel_axis::call(const_cast(*this)); +} + +// aten::int_repr(Tensor self) -> Tensor +inline at::Tensor Tensor::int_repr() const { + return at::_ops::int_repr::call(const_cast(*this)); +} + +// aten::qscheme(Tensor self) -> QScheme +inline at::QScheme Tensor::qscheme() const { + return at::_ops::qscheme::call(const_cast(*this)); +} + +// aten::_autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a) +inline at::Tensor Tensor::_autocast_to_reduced_precision(bool cuda_enabled, bool cpu_enabled, at::ScalarType cuda_dtype, at::ScalarType cpu_dtype) const { + return at::_ops::_autocast_to_reduced_precision::call(const_cast(*this), cuda_enabled, cpu_enabled, cuda_dtype, cpu_dtype); +} + +// aten::_autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a) +inline at::Tensor Tensor::_autocast_to_full_precision(bool cuda_enabled, bool cpu_enabled) const { + return at::_ops::_autocast_to_full_precision::call(const_cast(*this), cuda_enabled, cpu_enabled); +} + +// aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::TensorOptions options, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype_layout::call(const_cast(*this), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, copy, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} + +// aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype_layout::call(const_cast(*this), dtype, layout, device, pin_memory, non_blocking, copy, memory_format); +} + +// aten::to.device(Tensor(a) self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::Device device, at::ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_device::call(const_cast(*this), device, dtype, non_blocking, copy, memory_format); +} + +// aten::to.dtype(Tensor(a) self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype::call(const_cast(*this), dtype, non_blocking, copy, memory_format); +} + +// aten::to.other(Tensor(a) self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(const at::Tensor & other, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_other::call(const_cast(*this), other, non_blocking, copy, memory_format); +} + +// aten::item(Tensor self) -> Scalar +inline at::Scalar Tensor::item() const { + return at::_ops::item::call(const_cast(*this)); +} + +// aten::set_.source_Storage(Tensor(a!) self, Storage source) -> Tensor(a!) +inline at::Tensor & Tensor::set_(at::Storage source) const { + return at::_ops::set__source_Storage::call(const_cast(*this), source); +} + +// aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set_(at::Storage source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::set__source_Storage_storage_offset::call(const_cast(*this), source, storage_offset, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set__symint(at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::set__source_Storage_storage_offset::call(const_cast(*this), source, storage_offset, size, stride); +} + +// aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set_(const at::Tensor & source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::set__source_Tensor_storage_offset::call(const_cast(*this), source, storage_offset, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set__symint(const at::Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::set__source_Tensor_storage_offset::call(const_cast(*this), source, storage_offset, size, stride); +} + +// aten::set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::set_(const at::Tensor & source) const { + return at::_ops::set__source_Tensor::call(const_cast(*this), source); +} + +// aten::set_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::set_() const { + return at::_ops::set_::call(const_cast(*this)); +} + +// aten::is_set_to(Tensor self, Tensor tensor) -> bool +inline bool Tensor::is_set_to(const at::Tensor & tensor) const { + return at::_ops::is_set_to::call(const_cast(*this), tensor); +} + +// aten::masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::masked_fill_(const at::Tensor & mask, const at::Scalar & value) const { + return at::_ops::masked_fill__Scalar::call(const_cast(*this), mask, value); +} + +// aten::masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor +inline at::Tensor Tensor::masked_fill(const at::Tensor & mask, const at::Scalar & value) const { + return at::_ops::masked_fill_Scalar::call(const_cast(*this), mask, value); +} + +// aten::masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::masked_fill_(const at::Tensor & mask, const at::Tensor & value) const { + return at::_ops::masked_fill__Tensor::call(const_cast(*this), mask, value); +} + +// aten::masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor +inline at::Tensor Tensor::masked_fill(const at::Tensor & mask, const at::Tensor & value) const { + return at::_ops::masked_fill_Tensor::call(const_cast(*this), mask, value); +} + +// aten::masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::masked_scatter_(const at::Tensor & mask, const at::Tensor & source) const { + return at::_ops::masked_scatter_::call(const_cast(*this), mask, source); +} + +// aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor +inline at::Tensor Tensor::masked_scatter(const at::Tensor & mask, const at::Tensor & source) const { + return at::_ops::masked_scatter::call(const_cast(*this), mask, source); +} + +// aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::view(at::IntArrayRef size) const { + return at::_ops::view::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::view_symint(c10::SymIntArrayRef size) const { + return at::_ops::view::call(const_cast(*this), size); +} + +// aten::view.dtype(Tensor(a) self, ScalarType dtype) -> Tensor(a) +inline at::Tensor Tensor::view(at::ScalarType dtype) const { + return at::_ops::view_dtype::call(const_cast(*this), dtype); +} + +// aten::put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!) +inline at::Tensor & Tensor::put_(const at::Tensor & index, const at::Tensor & source, bool accumulate) const { + return at::_ops::put_::call(const_cast(*this), index, source, accumulate); +} + +// aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor +inline at::Tensor Tensor::put(const at::Tensor & index, const at::Tensor & source, bool accumulate) const { + return at::_ops::put::call(const_cast(*this), index, source, accumulate); +} + +// aten::index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::index_add_(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add_::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::index_add(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::index_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add_dimname::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!) +inline at::Tensor & Tensor::index_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self) const { + return at::_ops::index_reduce_::call(const_cast(*this), dim, index, source, reduce, include_self); +} + +// aten::index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor +inline at::Tensor Tensor::index_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self) const { + return at::_ops::index_reduce::call(const_cast(*this), dim, index, source, reduce, include_self); +} + +// aten::index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill__int_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::index_fill(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill_int_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(int64_t dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill__int_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor +inline at::Tensor Tensor::index_fill(int64_t dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill_int_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill__Dimname_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.Dimname_Tensor(Tensor(a!) self, Dimname dim, Tensor index, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill__Dimname_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.Dimname_Scalar(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::index_fill(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill_Dimname_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor +inline at::Tensor Tensor::index_fill(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill_Dimname_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter__src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter_value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter__value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const { + return at::_ops::scatter_reduce::call(const_cast(*this), dim, index, src, reduce); +} + +// aten::scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const { + return at::_ops::scatter__reduce::call(const_cast(*this), dim, index, src, reduce); +} + +// aten::scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const { + return at::_ops::scatter_value_reduce::call(const_cast(*this), dim, index, value, reduce); +} + +// aten::scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const { + return at::_ops::scatter__value_reduce::call(const_cast(*this), dim, index, value, reduce); +} + +// aten::scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_dimname_src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter.dimname_value(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::scatter(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter_dimname_value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter_add(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_add_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add_::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add_dimname::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor +inline at::Tensor Tensor::scatter_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self) const { + return at::_ops::scatter_reduce_two::call(const_cast(*this), dim, index, src, reduce, include_self); +} + +// aten::scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self) const { + return at::_ops::scatter_reduce__two::call(const_cast(*this), dim, index, src, reduce, include_self); +} + +// aten::eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::eq_(const at::Scalar & other) const { + return at::_ops::eq__Scalar::call(const_cast(*this), other); +} + +// aten::eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::eq_(const at::Tensor & other) const { + return at::_ops::eq__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_and(const at::Scalar & other) const { + return at::_ops::bitwise_and_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_and(const at::Tensor & other) const { + return at::_ops::bitwise_and_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_and_(const at::Scalar & other) const { + return at::_ops::bitwise_and__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_and_(const at::Tensor & other) const { + return at::_ops::bitwise_and__Tensor::call(const_cast(*this), other); +} + +// aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__and__(const at::Scalar & other) const { + return at::_ops::__and___Scalar::call(const_cast(*this), other); +} + +// aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__and__(const at::Tensor & other) const { + return at::_ops::__and___Tensor::call(const_cast(*this), other); +} + +// aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__iand__(const at::Scalar & other) const { + return at::_ops::__iand___Scalar::call(const_cast(*this), other); +} + +// aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__iand__(const at::Tensor & other) const { + return at::_ops::__iand___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_or(const at::Scalar & other) const { + return at::_ops::bitwise_or_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_or(const at::Tensor & other) const { + return at::_ops::bitwise_or_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_or_(const at::Scalar & other) const { + return at::_ops::bitwise_or__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_or_(const at::Tensor & other) const { + return at::_ops::bitwise_or__Tensor::call(const_cast(*this), other); +} + +// aten::__or__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__or__(const at::Scalar & other) const { + return at::_ops::__or___Scalar::call(const_cast(*this), other); +} + +// aten::__or__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__or__(const at::Tensor & other) const { + return at::_ops::__or___Tensor::call(const_cast(*this), other); +} + +// aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ior__(const at::Scalar & other) const { + return at::_ops::__ior___Scalar::call(const_cast(*this), other); +} + +// aten::__ior__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ior__(const at::Tensor & other) const { + return at::_ops::__ior___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_xor(const at::Scalar & other) const { + return at::_ops::bitwise_xor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_xor(const at::Tensor & other) const { + return at::_ops::bitwise_xor_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_xor_(const at::Scalar & other) const { + return at::_ops::bitwise_xor__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_xor_(const at::Tensor & other) const { + return at::_ops::bitwise_xor__Tensor::call(const_cast(*this), other); +} + +// aten::__xor__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__xor__(const at::Scalar & other) const { + return at::_ops::__xor___Scalar::call(const_cast(*this), other); +} + +// aten::__xor__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__xor__(const at::Tensor & other) const { + return at::_ops::__xor___Tensor::call(const_cast(*this), other); +} + +// aten::__ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ixor__(const at::Scalar & other) const { + return at::_ops::__ixor___Scalar::call(const_cast(*this), other); +} + +// aten::__ixor__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ixor__(const at::Tensor & other) const { + return at::_ops::__ixor___Tensor::call(const_cast(*this), other); +} + +// aten::__lshift__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__lshift__(const at::Scalar & other) const { + return at::_ops::__lshift___Scalar::call(const_cast(*this), other); +} + +// aten::__lshift__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__lshift__(const at::Tensor & other) const { + return at::_ops::__lshift___Tensor::call(const_cast(*this), other); +} + +// aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ilshift__(const at::Scalar & other) const { + return at::_ops::__ilshift___Scalar::call(const_cast(*this), other); +} + +// aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ilshift__(const at::Tensor & other) const { + return at::_ops::__ilshift___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_left_shift(const at::Tensor & other) const { + return at::_ops::bitwise_left_shift_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_left_shift_(const at::Tensor & other) const { + return at::_ops::bitwise_left_shift__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_left_shift(const at::Scalar & other) const { + return at::_ops::bitwise_left_shift_Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_left_shift_(const at::Scalar & other) const { + return at::_ops::bitwise_left_shift__Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::__rshift__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__rshift__(const at::Scalar & other) const { + return at::_ops::__rshift___Scalar::call(const_cast(*this), other); +} + +// aten::__rshift__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__rshift__(const at::Tensor & other) const { + return at::_ops::__rshift___Tensor::call(const_cast(*this), other); +} + +// aten::__irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__irshift__(const at::Scalar & other) const { + return at::_ops::__irshift___Scalar::call(const_cast(*this), other); +} + +// aten::__irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__irshift__(const at::Tensor & other) const { + return at::_ops::__irshift___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_right_shift(const at::Tensor & other) const { + return at::_ops::bitwise_right_shift_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_right_shift_(const at::Tensor & other) const { + return at::_ops::bitwise_right_shift__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_right_shift(const at::Scalar & other) const { + return at::_ops::bitwise_right_shift_Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_right_shift_(const at::Scalar & other) const { + return at::_ops::bitwise_right_shift__Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::tril_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::tril_(int64_t diagonal) const { + return at::_ops::tril_::call(const_cast(*this), diagonal); +} + +// aten::tril_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::tril__symint(c10::SymInt diagonal) const { + return at::_ops::tril_::call(const_cast(*this), diagonal); +} + +// aten::triu_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::triu_(int64_t diagonal) const { + return at::_ops::triu_::call(const_cast(*this), diagonal); +} + +// aten::triu_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::triu__symint(c10::SymInt diagonal) const { + return at::_ops::triu_::call(const_cast(*this), diagonal); +} + +// aten::digamma_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::digamma_() const { + return at::_ops::digamma_::call(const_cast(*this)); +} + +// aten::lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!) +inline at::Tensor & Tensor::lerp_(const at::Tensor & end, const at::Scalar & weight) const { + return at::_ops::lerp__Scalar::call(const_cast(*this), end, weight); +} + +// aten::lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!) +inline at::Tensor & Tensor::lerp_(const at::Tensor & end, const at::Tensor & weight) const { + return at::_ops::lerp__Tensor::call(const_cast(*this), end, weight); +} + +// aten::addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addbmm_::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addbmm::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(int64_t from, ::std::optional to, ::std::optional generator) const { + return at::_ops::random__from::call(const_cast(*this), from, to, generator); +} + +// aten::random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(int64_t to, ::std::optional generator) const { + return at::_ops::random__to::call(const_cast(*this), to, generator); +} + +// aten::random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(::std::optional generator) const { + return at::_ops::random_::call(const_cast(*this), generator); +} + +// aten::uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::uniform_(double from, double to, ::std::optional generator) const { + return at::_ops::uniform_::call(const_cast(*this), from, to, generator); +} + +// aten::cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::cauchy_(double median, double sigma, ::std::optional generator) const { + return at::_ops::cauchy_::call(const_cast(*this), median, sigma, generator); +} + +// aten::log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::log_normal_(double mean, double std, ::std::optional generator) const { + return at::_ops::log_normal_::call(const_cast(*this), mean, std, generator); +} + +// aten::exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::exponential_(double lambd, ::std::optional generator) const { + return at::_ops::exponential_::call(const_cast(*this), lambd, generator); +} + +// aten::geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::geometric_(double p, ::std::optional generator) const { + return at::_ops::geometric_::call(const_cast(*this), p, generator); +} + +// aten::diag(Tensor self, int diagonal=0) -> Tensor +inline at::Tensor Tensor::diag(int64_t diagonal) const { + return at::_ops::diag::call(const_cast(*this), diagonal); +} + +// aten::cross(Tensor self, Tensor other, int? dim=None) -> Tensor +inline at::Tensor Tensor::cross(const at::Tensor & other, ::std::optional dim) const { + return at::_ops::cross::call(const_cast(*this), other, dim); +} + +// aten::triu(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::triu(int64_t diagonal) const { + return at::_ops::triu::call(const_cast(*this), diagonal); +} + +// aten::triu(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::triu_symint(c10::SymInt diagonal) const { + return at::_ops::triu::call(const_cast(*this), diagonal); +} + +// aten::tril(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::tril(int64_t diagonal) const { + return at::_ops::tril::call(const_cast(*this), diagonal); +} + +// aten::tril(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::tril_symint(c10::SymInt diagonal) const { + return at::_ops::tril::call(const_cast(*this), diagonal); +} + +// aten::trace(Tensor self) -> Tensor +inline at::Tensor Tensor::trace() const { + return at::_ops::trace::call(const_cast(*this)); +} + +// aten::ne.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::ne(const at::Scalar & other) const { + return at::_ops::ne_Scalar::call(const_cast(*this), other); +} + +// aten::ne.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ne(const at::Tensor & other) const { + return at::_ops::ne_Tensor::call(const_cast(*this), other); +} + +// aten::ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::ne_(const at::Scalar & other) const { + return at::_ops::ne__Scalar::call(const_cast(*this), other); +} + +// aten::ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ne_(const at::Tensor & other) const { + return at::_ops::ne__Tensor::call(const_cast(*this), other); +} + +// aten::not_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::not_equal(const at::Scalar & other) const { + return at::_ops::not_equal_Scalar::call(const_cast(*this), other); +} + +// aten::not_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::not_equal(const at::Tensor & other) const { + return at::_ops::not_equal_Tensor::call(const_cast(*this), other); +} + +// aten::not_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::not_equal_(const at::Scalar & other) const { + return at::_ops::not_equal__Scalar::call(const_cast(*this), other); +} + +// aten::not_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::not_equal_(const at::Tensor & other) const { + return at::_ops::not_equal__Tensor::call(const_cast(*this), other); +} + +// aten::eq.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::eq(const at::Scalar & other) const { + return at::_ops::eq_Scalar::call(const_cast(*this), other); +} + +// aten::eq.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::eq(const at::Tensor & other) const { + return at::_ops::eq_Tensor::call(const_cast(*this), other); +} + +// aten::ge.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::ge(const at::Scalar & other) const { + return at::_ops::ge_Scalar::call(const_cast(*this), other); +} + +// aten::ge.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ge(const at::Tensor & other) const { + return at::_ops::ge_Tensor::call(const_cast(*this), other); +} + +// aten::ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::ge_(const at::Scalar & other) const { + return at::_ops::ge__Scalar::call(const_cast(*this), other); +} + +// aten::ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ge_(const at::Tensor & other) const { + return at::_ops::ge__Tensor::call(const_cast(*this), other); +} + +// aten::greater_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::greater_equal(const at::Scalar & other) const { + return at::_ops::greater_equal_Scalar::call(const_cast(*this), other); +} + +// aten::greater_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::greater_equal(const at::Tensor & other) const { + return at::_ops::greater_equal_Tensor::call(const_cast(*this), other); +} + +// aten::greater_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_equal_(const at::Scalar & other) const { + return at::_ops::greater_equal__Scalar::call(const_cast(*this), other); +} + +// aten::greater_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_equal_(const at::Tensor & other) const { + return at::_ops::greater_equal__Tensor::call(const_cast(*this), other); +} + +// aten::le.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::le(const at::Scalar & other) const { + return at::_ops::le_Scalar::call(const_cast(*this), other); +} + +// aten::le.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::le(const at::Tensor & other) const { + return at::_ops::le_Tensor::call(const_cast(*this), other); +} + +// aten::le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::le_(const at::Scalar & other) const { + return at::_ops::le__Scalar::call(const_cast(*this), other); +} + +// aten::le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::le_(const at::Tensor & other) const { + return at::_ops::le__Tensor::call(const_cast(*this), other); +} + +// aten::less_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::less_equal(const at::Scalar & other) const { + return at::_ops::less_equal_Scalar::call(const_cast(*this), other); +} + +// aten::less_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::less_equal(const at::Tensor & other) const { + return at::_ops::less_equal_Tensor::call(const_cast(*this), other); +} + +// aten::less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::less_equal_(const at::Scalar & other) const { + return at::_ops::less_equal__Scalar::call(const_cast(*this), other); +} + +// aten::less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::less_equal_(const at::Tensor & other) const { + return at::_ops::less_equal__Tensor::call(const_cast(*this), other); +} + +// aten::gt.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::gt(const at::Scalar & other) const { + return at::_ops::gt_Scalar::call(const_cast(*this), other); +} + +// aten::gt.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::gt(const at::Tensor & other) const { + return at::_ops::gt_Tensor::call(const_cast(*this), other); +} + +// aten::gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::gt_(const at::Scalar & other) const { + return at::_ops::gt__Scalar::call(const_cast(*this), other); +} + +// aten::gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::gt_(const at::Tensor & other) const { + return at::_ops::gt__Tensor::call(const_cast(*this), other); +} + +// aten::greater.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::greater(const at::Scalar & other) const { + return at::_ops::greater_Scalar::call(const_cast(*this), other); +} + +// aten::greater.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::greater(const at::Tensor & other) const { + return at::_ops::greater_Tensor::call(const_cast(*this), other); +} + +// aten::greater_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_(const at::Scalar & other) const { + return at::_ops::greater__Scalar::call(const_cast(*this), other); +} + +// aten::greater_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_(const at::Tensor & other) const { + return at::_ops::greater__Tensor::call(const_cast(*this), other); +} + +// aten::lt.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::lt(const at::Scalar & other) const { + return at::_ops::lt_Scalar::call(const_cast(*this), other); +} + +// aten::lt.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::lt(const at::Tensor & other) const { + return at::_ops::lt_Tensor::call(const_cast(*this), other); +} + +// aten::lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::lt_(const at::Scalar & other) const { + return at::_ops::lt__Scalar::call(const_cast(*this), other); +} + +// aten::lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::lt_(const at::Tensor & other) const { + return at::_ops::lt__Tensor::call(const_cast(*this), other); +} + +// aten::less.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::less(const at::Scalar & other) const { + return at::_ops::less_Scalar::call(const_cast(*this), other); +} + +// aten::less.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::less(const at::Tensor & other) const { + return at::_ops::less_Tensor::call(const_cast(*this), other); +} + +// aten::less_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::less_(const at::Scalar & other) const { + return at::_ops::less__Scalar::call(const_cast(*this), other); +} + +// aten::less_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::less_(const at::Tensor & other) const { + return at::_ops::less__Tensor::call(const_cast(*this), other); +} + +// aten::take(Tensor self, Tensor index) -> Tensor +inline at::Tensor Tensor::take(const at::Tensor & index) const { + return at::_ops::take::call(const_cast(*this), index); +} + +// aten::take_along_dim(Tensor self, Tensor indices, int? dim=None) -> Tensor +inline at::Tensor Tensor::take_along_dim(const at::Tensor & indices, ::std::optional dim) const { + return at::_ops::take_along_dim::call(const_cast(*this), indices, dim); +} + +// aten::index_select(Tensor self, int dim, Tensor index) -> Tensor +inline at::Tensor Tensor::index_select(int64_t dim, const at::Tensor & index) const { + return at::_ops::index_select::call(const_cast(*this), dim, index); +} + +// aten::index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor +inline at::Tensor Tensor::index_select(at::Dimname dim, const at::Tensor & index) const { + return at::_ops::index_select_dimname::call(const_cast(*this), dim, index); +} + +// aten::masked_select(Tensor self, Tensor mask) -> Tensor +inline at::Tensor Tensor::masked_select(const at::Tensor & mask) const { + return at::_ops::masked_select::call(const_cast(*this), mask); +} + +// aten::nonzero(Tensor self) -> Tensor +inline at::Tensor Tensor::nonzero() const { + return at::_ops::nonzero::call(const_cast(*this)); +} + +// aten::nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor +inline at::Tensor Tensor::nonzero_static(int64_t size, int64_t fill_value) const { + return at::_ops::nonzero_static::call(const_cast(*this), size, fill_value); +} + +// aten::nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor +inline at::Tensor Tensor::nonzero_static_symint(c10::SymInt size, int64_t fill_value) const { + return at::_ops::nonzero_static::call(const_cast(*this), size, fill_value); +} + +// aten::nonzero_numpy(Tensor self) -> Tensor[] +inline ::std::vector Tensor::nonzero_numpy() const { + return at::_ops::nonzero_numpy::call(const_cast(*this)); +} + +// aten::argwhere(Tensor self) -> Tensor +inline at::Tensor Tensor::argwhere() const { + return at::_ops::argwhere::call(const_cast(*this)); +} + +// aten::gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor +inline at::Tensor Tensor::gather(int64_t dim, const at::Tensor & index, bool sparse_grad) const { + return at::_ops::gather::call(const_cast(*this), dim, index, sparse_grad); +} + +// aten::gather.dimname(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False) -> Tensor +inline at::Tensor Tensor::gather(at::Dimname dim, const at::Tensor & index, bool sparse_grad) const { + return at::_ops::gather_dimname::call(const_cast(*this), dim, index, sparse_grad); +} + +// aten::addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor +inline at::Tensor Tensor::addcmul(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcmul::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!) +inline at::Tensor & Tensor::addcmul_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcmul_::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor +inline at::Tensor Tensor::addcdiv(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcdiv::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!) +inline at::Tensor & Tensor::addcdiv_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcdiv_::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient) +inline ::std::tuple Tensor::triangular_solve(const at::Tensor & A, bool upper, bool transpose, bool unitriangular) const { + return at::_ops::triangular_solve::call(const_cast(*this), A, upper, transpose, unitriangular); +} + +// aten::svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V) +inline ::std::tuple Tensor::svd(bool some, bool compute_uv) const { + return at::_ops::svd::call(const_cast(*this), some, compute_uv); +} + +// aten::swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a) +inline at::Tensor Tensor::swapaxes(int64_t axis0, int64_t axis1) const { + return at::_ops::swapaxes::call(const_cast(*this), axis0, axis1); +} + +// aten::swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!) +inline at::Tensor & Tensor::swapaxes_(int64_t axis0, int64_t axis1) const { + return at::_ops::swapaxes_::call(const_cast(*this), axis0, axis1); +} + +// aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor Tensor::swapdims(int64_t dim0, int64_t dim1) const { + return at::_ops::swapdims::call(const_cast(*this), dim0, dim1); +} + +// aten::swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) +inline at::Tensor & Tensor::swapdims_(int64_t dim0, int64_t dim1) const { + return at::_ops::swapdims_::call(const_cast(*this), dim0, dim1); +} + +// aten::cholesky(Tensor self, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky(bool upper) const { + return at::_ops::cholesky::call(const_cast(*this), upper); +} + +// aten::cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky_solve(const at::Tensor & input2, bool upper) const { + return at::_ops::cholesky_solve::call(const_cast(*this), input2, upper); +} + +// aten::cholesky_inverse(Tensor self, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky_inverse(bool upper) const { + return at::_ops::cholesky_inverse::call(const_cast(*this), upper); +} + +// aten::qr(Tensor self, bool some=True) -> (Tensor Q, Tensor R) +inline ::std::tuple Tensor::qr(bool some) const { + return at::_ops::qr::call(const_cast(*this), some); +} + +// aten::geqrf(Tensor self) -> (Tensor a, Tensor tau) +inline ::std::tuple Tensor::geqrf() const { + return at::_ops::geqrf::call(const_cast(*this)); +} + +// aten::orgqr(Tensor self, Tensor input2) -> Tensor +inline at::Tensor Tensor::orgqr(const at::Tensor & input2) const { + return at::_ops::orgqr::call(const_cast(*this), input2); +} + +// aten::ormqr(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False) -> Tensor +inline at::Tensor Tensor::ormqr(const at::Tensor & input2, const at::Tensor & input3, bool left, bool transpose) const { + return at::_ops::ormqr::call(const_cast(*this), input2, input3, left, transpose); +} + +// aten::lu_solve(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor +inline at::Tensor Tensor::lu_solve(const at::Tensor & LU_data, const at::Tensor & LU_pivots) const { + return at::_ops::lu_solve::call(const_cast(*this), LU_data, LU_pivots); +} + +// aten::multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::multinomial(int64_t num_samples, bool replacement, ::std::optional generator) const { + return at::_ops::multinomial::call(const_cast(*this), num_samples, replacement, generator); +} + +// aten::multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::multinomial_symint(c10::SymInt num_samples, bool replacement, ::std::optional generator) const { + return at::_ops::multinomial::call(const_cast(*this), num_samples, replacement, generator); +} + +// aten::lgamma_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::lgamma_() const { + return at::_ops::lgamma_::call(const_cast(*this)); +} + +// aten::lgamma(Tensor self) -> Tensor +inline at::Tensor Tensor::lgamma() const { + return at::_ops::lgamma::call(const_cast(*this)); +} + +// aten::digamma(Tensor self) -> Tensor +inline at::Tensor Tensor::digamma() const { + return at::_ops::digamma::call(const_cast(*this)); +} + +// aten::polygamma(int n, Tensor self) -> Tensor +inline at::Tensor Tensor::polygamma(int64_t n) const { + return at::_ops::polygamma::call(n, const_cast(*this)); +} + +// aten::polygamma_(Tensor(a!) self, int n) -> Tensor(a!) +inline at::Tensor & Tensor::polygamma_(int64_t n) const { + return at::_ops::polygamma_::call(const_cast(*this), n); +} + +// aten::erfinv(Tensor self) -> Tensor +inline at::Tensor Tensor::erfinv() const { + return at::_ops::erfinv::call(const_cast(*this)); +} + +// aten::erfinv_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erfinv_() const { + return at::_ops::erfinv_::call(const_cast(*this)); +} + +// aten::i0(Tensor self) -> Tensor +inline at::Tensor Tensor::i0() const { + return at::_ops::i0::call(const_cast(*this)); +} + +// aten::i0_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::i0_() const { + return at::_ops::i0_::call(const_cast(*this)); +} + +// aten::sign(Tensor self) -> Tensor +inline at::Tensor Tensor::sign() const { + return at::_ops::sign::call(const_cast(*this)); +} + +// aten::sign_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sign_() const { + return at::_ops::sign_::call(const_cast(*this)); +} + +// aten::signbit(Tensor self) -> Tensor +inline at::Tensor Tensor::signbit() const { + return at::_ops::signbit::call(const_cast(*this)); +} + +// aten::dist(Tensor self, Tensor other, Scalar p=2) -> Tensor +inline at::Tensor Tensor::dist(const at::Tensor & other, const at::Scalar & p) const { + return at::_ops::dist::call(const_cast(*this), other, p); +} + +// aten::atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::atan2_(const at::Tensor & other) const { + return at::_ops::atan2_::call(const_cast(*this), other); +} + +// aten::atan2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::atan2(const at::Tensor & other) const { + return at::_ops::atan2::call(const_cast(*this), other); +} + +// aten::arctan2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::arctan2(const at::Tensor & other) const { + return at::_ops::arctan2::call(const_cast(*this), other); +} + +// aten::arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::arctan2_(const at::Tensor & other) const { + return at::_ops::arctan2_::call(const_cast(*this), other); +} + +// aten::lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor +inline at::Tensor Tensor::lerp(const at::Tensor & end, const at::Scalar & weight) const { + return at::_ops::lerp_Scalar::call(const_cast(*this), end, weight); +} + +// aten::lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor +inline at::Tensor Tensor::lerp(const at::Tensor & end, const at::Tensor & weight) const { + return at::_ops::lerp_Tensor::call(const_cast(*this), end, weight); +} + +// aten::histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor +inline at::Tensor Tensor::histc(int64_t bins, const at::Scalar & min, const at::Scalar & max) const { + return at::_ops::histc::call(const_cast(*this), bins, min, max); +} + +// aten::histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges) +inline ::std::tuple Tensor::histogram(const at::Tensor & bins, const ::std::optional & weight, bool density) const { + return at::_ops::histogram_bins_tensor::call(const_cast(*this), bins, weight, density); +} + +// aten::histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges) +inline ::std::tuple Tensor::histogram(int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density) const { + return at::_ops::histogram_bin_ct::call(const_cast(*this), bins, range, weight, density); +} + +// aten::fmod.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::fmod(const at::Scalar & other) const { + return at::_ops::fmod_Scalar::call(const_cast(*this), other); +} + +// aten::fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::fmod_(const at::Scalar & other) const { + return at::_ops::fmod__Scalar::call(const_cast(*this), other); +} + +// aten::fmod.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmod(const at::Tensor & other) const { + return at::_ops::fmod_Tensor::call(const_cast(*this), other); +} + +// aten::fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::fmod_(const at::Tensor & other) const { + return at::_ops::fmod__Tensor::call(const_cast(*this), other); +} + +// aten::hypot(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::hypot(const at::Tensor & other) const { + return at::_ops::hypot::call(const_cast(*this), other); +} + +// aten::hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::hypot_(const at::Tensor & other) const { + return at::_ops::hypot_::call(const_cast(*this), other); +} + +// aten::igamma(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::igamma(const at::Tensor & other) const { + return at::_ops::igamma::call(const_cast(*this), other); +} + +// aten::igamma_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::igamma_(const at::Tensor & other) const { + return at::_ops::igamma_::call(const_cast(*this), other); +} + +// aten::igammac(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::igammac(const at::Tensor & other) const { + return at::_ops::igammac::call(const_cast(*this), other); +} + +// aten::igammac_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::igammac_(const at::Tensor & other) const { + return at::_ops::igammac_::call(const_cast(*this), other); +} + +// aten::nextafter(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::nextafter(const at::Tensor & other) const { + return at::_ops::nextafter::call(const_cast(*this), other); +} + +// aten::nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::nextafter_(const at::Tensor & other) const { + return at::_ops::nextafter_::call(const_cast(*this), other); +} + +// aten::remainder.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::remainder(const at::Scalar & other) const { + return at::_ops::remainder_Scalar::call(const_cast(*this), other); +} + +// aten::remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::remainder_(const at::Scalar & other) const { + return at::_ops::remainder__Scalar::call(const_cast(*this), other); +} + +// aten::remainder.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::remainder(const at::Tensor & other) const { + return at::_ops::remainder_Tensor::call(const_cast(*this), other); +} + +// aten::remainder_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::remainder_(const at::Tensor & other) const { + return at::_ops::remainder__Tensor::call(const_cast(*this), other); +} + +// aten::min(Tensor self) -> Tensor +inline at::Tensor Tensor::min() const { + return at::_ops::min::call(const_cast(*this)); +} + +// aten::fmin(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmin(const at::Tensor & other) const { + return at::_ops::fmin::call(const_cast(*this), other); +} + +// aten::max(Tensor self) -> Tensor +inline at::Tensor Tensor::max() const { + return at::_ops::max::call(const_cast(*this)); +} + +// aten::fmax(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmax(const at::Tensor & other) const { + return at::_ops::fmax::call(const_cast(*this), other); +} + +// aten::maximum(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::maximum(const at::Tensor & other) const { + return at::_ops::maximum::call(const_cast(*this), other); +} + +// aten::max.other(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::max(const at::Tensor & other) const { + return at::_ops::max_other::call(const_cast(*this), other); +} + +// aten::minimum(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::minimum(const at::Tensor & other) const { + return at::_ops::minimum::call(const_cast(*this), other); +} + +// aten::min.other(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::min(const at::Tensor & other) const { + return at::_ops::min_other::call(const_cast(*this), other); +} + +// aten::quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::quantile(const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::quantile::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::quantile(double q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::quantile_scalar::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::nanquantile(const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::nanquantile::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::nanquantile(double q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::nanquantile_scalar::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(int64_t dim, bool descending) const { + return at::_ops::sort::call(const_cast(*this), dim, descending); +} + +// aten::sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(::std::optional stable, int64_t dim, bool descending) const { + return at::_ops::sort_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::sort.dimname(Tensor self, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(at::Dimname dim, bool descending) const { + return at::_ops::sort_dimname::call(const_cast(*this), dim, descending); +} + +// aten::sort.dimname_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(::std::optional stable, at::Dimname dim, bool descending) const { + return at::_ops::sort_dimname_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::msort(Tensor self) -> Tensor +inline at::Tensor Tensor::msort() const { + return at::_ops::msort::call(const_cast(*this)); +} + +// aten::argsort(Tensor self, int dim=-1, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(int64_t dim, bool descending) const { + return at::_ops::argsort::call(const_cast(*this), dim, descending); +} + +// aten::argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(bool stable, int64_t dim, bool descending) const { + return at::_ops::argsort_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(at::Dimname dim, bool descending) const { + return at::_ops::argsort_dimname::call(const_cast(*this), dim, descending); +} + +// aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::topk(int64_t k, int64_t dim, bool largest, bool sorted) const { + return at::_ops::topk::call(const_cast(*this), k, dim, largest, sorted); +} + +// aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::topk_symint(c10::SymInt k, int64_t dim, bool largest, bool sorted) const { + return at::_ops::topk::call(const_cast(*this), k, dim, largest, sorted); +} + +// aten::all(Tensor self) -> Tensor +inline at::Tensor Tensor::all() const { + return at::_ops::all::call(const_cast(*this)); +} + +// aten::any(Tensor self) -> Tensor +inline at::Tensor Tensor::any() const { + return at::_ops::any::call(const_cast(*this)); +} + +// aten::renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor +inline at::Tensor Tensor::renorm(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const { + return at::_ops::renorm::call(const_cast(*this), p, dim, maxnorm); +} + +// aten::renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!) +inline at::Tensor & Tensor::renorm_(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const { + return at::_ops::renorm_::call(const_cast(*this), p, dim, maxnorm); +} + +// aten::unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a) +inline at::Tensor Tensor::unfold(int64_t dimension, int64_t size, int64_t step) const { + return at::_ops::unfold::call(const_cast(*this), dimension, size, step); +} + +// aten::equal(Tensor self, Tensor other) -> bool +inline bool Tensor::equal(const at::Tensor & other) const { + return at::_ops::equal::call(const_cast(*this), other); +} + +// aten::pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor +inline at::Tensor Tensor::pow(const at::Tensor & exponent) const { + return at::_ops::pow_Tensor_Tensor::call(const_cast(*this), exponent); +} + +// aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor +inline at::Tensor Tensor::pow(const at::Scalar & exponent) const { + return at::_ops::pow_Tensor_Scalar::call(const_cast(*this), exponent); +} + +// aten::pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) +inline at::Tensor & Tensor::pow_(const at::Scalar & exponent) const { + return at::_ops::pow__Scalar::call(const_cast(*this), exponent); +} + +// aten::pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) +inline at::Tensor & Tensor::pow_(const at::Tensor & exponent) const { + return at::_ops::pow__Tensor::call(const_cast(*this), exponent); +} + +// aten::float_power.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor +inline at::Tensor Tensor::float_power(const at::Tensor & exponent) const { + return at::_ops::float_power_Tensor_Tensor::call(const_cast(*this), exponent); +} + +// aten::float_power.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor +inline at::Tensor Tensor::float_power(const at::Scalar & exponent) const { + return at::_ops::float_power_Tensor_Scalar::call(const_cast(*this), exponent); +} + +// aten::float_power_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) +inline at::Tensor & Tensor::float_power_(const at::Scalar & exponent) const { + return at::_ops::float_power__Scalar::call(const_cast(*this), exponent); +} + +// aten::float_power_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) +inline at::Tensor & Tensor::float_power_(const at::Tensor & exponent) const { + return at::_ops::float_power__Tensor::call(const_cast(*this), exponent); +} + +// aten::normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::normal_(double mean, double std, ::std::optional generator) const { + return at::_ops::normal_::call(const_cast(*this), mean, std, generator); +} + +// aten::alias(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::alias() const { + return at::_ops::alias::call(const_cast(*this)); +} + +// aten::isfinite(Tensor self) -> Tensor +inline at::Tensor Tensor::isfinite() const { + return at::_ops::isfinite::call(const_cast(*this)); +} + +// aten::isinf(Tensor self) -> Tensor +inline at::Tensor Tensor::isinf() const { + return at::_ops::isinf::call(const_cast(*this)); +} + +// aten::record_stream(Tensor(a!) self, Stream s) -> () +inline void Tensor::record_stream(at::Stream s) const { + return at::_ops::record_stream::call(const_cast(*this), s); +} + +// aten::isposinf(Tensor self) -> Tensor +inline at::Tensor Tensor::isposinf() const { + return at::_ops::isposinf::call(const_cast(*this)); +} + +// aten::isneginf(Tensor self) -> Tensor +inline at::Tensor Tensor::isneginf() const { + return at::_ops::isneginf::call(const_cast(*this)); +} + +// aten::det(Tensor self) -> Tensor +inline at::Tensor Tensor::det() const { + return at::_ops::det::call(const_cast(*this)); +} + +// aten::slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) +inline ::std::tuple Tensor::slogdet() const { + return at::_ops::slogdet::call(const_cast(*this)); +} + +// aten::logdet(Tensor self) -> Tensor +inline at::Tensor Tensor::logdet() const { + return at::_ops::logdet::call(const_cast(*this)); +} + +// aten::inverse(Tensor self) -> Tensor +inline at::Tensor Tensor::inverse() const { + return at::_ops::inverse::call(const_cast(*this)); +} + +// aten::inner(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::inner(const at::Tensor & other) const { + return at::_ops::inner::call(const_cast(*this), other); +} + +// aten::outer(Tensor self, Tensor vec2) -> Tensor +inline at::Tensor Tensor::outer(const at::Tensor & vec2) const { + return at::_ops::outer::call(const_cast(*this), vec2); +} + +// aten::ger(Tensor self, Tensor vec2) -> Tensor +inline at::Tensor Tensor::ger(const at::Tensor & vec2) const { + return at::_ops::ger::call(const_cast(*this), vec2); +} + +// aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor +inline at::Tensor Tensor::to_padded_tensor(double padding, at::OptionalIntArrayRef output_size) const { + return at::_ops::to_padded_tensor::call(const_cast(*this), padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt); +} + +// aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor +inline at::Tensor Tensor::to_padded_tensor_symint(double padding, at::OptionalSymIntArrayRef output_size) const { + return at::_ops::to_padded_tensor::call(const_cast(*this), padding, output_size); +} +} // namespace at + + +namespace c10 { +template <> +struct MaybeOwnedTraits { + using owned_type = at::Tensor; + using borrow_type = at::Tensor; + + static borrow_type createBorrow(const owned_type& from) { + // NOTE: this can be implemented without the special + // unsafe_borrow_t Tensor constructor as + // + // return borrow_type(c10::intrusive_ptr::reclaim(from.unsafeGetTensorImpl())); + // + // but that hurts inlining due to the nullptr check in the + // Tensor(c10::intrusive_ptr<...>) constructor. We already know + // that from.impl_ isn't null because from is a valid Tensor, so + // we needn't do the check again. (using __builtin_assume can + // avoid this, but wouldn't be portable to MSVC.) + return borrow_type(borrow_type::unsafe_borrow_t{}, from); + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.unsafeReleaseTensorImpl(); + // See above note: this can be implemented with public API + // similarly to createBorrow(), but that would hurt inlining. + lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs); + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0. + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + return true; + } +}; + +template <> +struct ExclusivelyOwnedTraits { + using repr_type = at::Tensor; + using pointer_type = at::Tensor*; + using const_pointer_type = const at::Tensor*; + + static repr_type nullRepr() { + return at::Tensor(); + } + + template + static repr_type createInPlace(Args&&... args) { + return at::Tensor(std::forward(args)...); + } + + static repr_type moveToRepr(at::Tensor&& x) { + return std::move(x); + } + + static void destroyOwned(at::Tensor& x) { + return ExclusivelyOwnedTraits::destroyOwned(x); + } + + static at::Tensor take(at::Tensor& x) { + return std::move(x); + } + + static pointer_type getImpl(repr_type& x) { + return &x; + } + + static const_pointer_type getImpl(const repr_type& x) { + return &x; + } +}; +} // namespace c10 + +namespace at { + +inline c10::MaybeOwned borrow_from_optional_tensor( + const std::optional& opt) { + return opt.has_value() + ? c10::MaybeOwned::borrowed(*opt) + : c10::MaybeOwned::owned(std::in_place); +} + +inline c10::MaybeOwned Tensor::expect_contiguous(MemoryFormat memory_format) const & { + if (is_contiguous(memory_format)) { + return c10::MaybeOwned::borrowed(*this); + } else { + return c10::MaybeOwned::owned(__dispatch_contiguous(memory_format)); + } +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..11851fa597489b30a0f22b9fab591826a081ab53 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace at::impl { + +TORCH_API bool tensor_has_dispatch(const at::Tensor& t); +TORCH_API bool tensorlist_has_dispatch(at::ITensorListRef li); +TORCH_API bool tensorlist_has_dispatch( + const c10::List>& li); +using c10::impl::dispatch_mode_enabled; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h new file mode 100644 index 0000000000000000000000000000000000000000..5a7c420c5f7f2df051306625c741562a04b0bed9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h @@ -0,0 +1,180 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +// Using DistAccumType in accumulate types for distributions. +// Note: Ideally we'd be using ATen/AccumulateType.h but looks +// like the there is some inconsistency in how accumulate types +// are mapped currently, e.g. for the cpu side, float is mapped +// to double. +template +struct DistAccumType { }; + +#if defined(__CUDACC__) || defined(__HIPCC__) +template <> struct DistAccumType { using type = float; }; +#endif +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = double; }; + +template +using dist_acctype = typename DistAccumType::type; + +namespace transformation { + +/** + * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified. + * `range` is `to - from` + * `base` is `from` + */ +template +C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) { + return static_cast(static_cast((val % range) + base)); +} + +/** + * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None + */ +template +C10_HOST_DEVICE inline T uniform_int_full_range(V val) { + return static_cast(static_cast(val)); +} + +/** + * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`. + * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double + * in this overloaded version + */ +template +C10_HOST_DEVICE inline std::enable_if_t, T>uniform_int(V val) { + if constexpr (std::is_same_v) { + return static_cast(val & 1); + } else if constexpr (std::is_same_v) { + return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); + } else if constexpr (std::is_same_v || std::is_same_v) { + return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); + } else if constexpr (std::is_integral_v) { + return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); + } else { + assert(false); + return 0; + } +} + +/** + * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`, + * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version. + */ +template +C10_HOST_DEVICE inline std::enable_if_t, T>uniform_int(V val) { + return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); +} + +template +C10_HOST_DEVICE inline dist_acctype uniform_real(V val, T from, T to) { + constexpr auto MASK = static_cast((static_cast(1) << std::numeric_limits::digits) - 1); + constexpr auto DIVISOR = static_cast>(1) / (static_cast(1) << std::numeric_limits::digits); + dist_acctype x = (val & MASK) * DIVISOR; + return (x * (to - from) + from); +} + +/** + * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to + * normally distributed with `mean` and standard deviation `std`. + */ +template +C10_HOST_DEVICE inline T normal(T val, T mean, T std) { + return val * std + mean; +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * Cauchy distribution with location parameter `median` and scale parameter `sigma`. + */ +template +C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { + // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function + // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps), + // thus we clip those values. + constexpr T eps = std::numeric_limits::epsilon(); + constexpr T one_minus_eps = 1 - eps; + constexpr T zero_plus_eps = 0 + eps; + val = (val > one_minus_eps ? one_minus_eps : val); + val = (val < zero_plus_eps ? zero_plus_eps : val); + return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); +} + +template <> +C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) { + // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function + return median + sigma * at::tan(c10::pi * (val - 0.5)); +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * exponentially distributed with `lambda` parameter of the distribution. + */ +template +C10_HOST_DEVICE inline T exponential(T val, T lambda) { + // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates + // Different implementations for CUDA and CPU to preserve original logic + // TODO: must be investigated and unified!!! + // https://github.com/pytorch/pytorch/issues/38662 +#if defined(__CUDACC__) || defined(__HIPCC__) + // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706 + // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0. + // we need log to be not 0, and not underflow when converted to half + // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args + auto log = val >= static_cast(1.) - std::numeric_limits::epsilon() / 2 + ? -std::numeric_limits::epsilon() / 2 + : at::log(val); + return static_cast(-1.0) / lambda * log; +#else + return static_cast(-1.0) / lambda * at::log1p(-val); +#endif +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * geometrically distributed with success probability `p`. + */ +template +C10_HOST_DEVICE inline T geometric(T val, T p) { + // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions + return static_cast(::ceil(at::log(val) / at::log1p(-p))); +} + +/** + * Transforms normally distributed `val` to log-normally distributed. + */ +template +C10_HOST_DEVICE inline T log_normal(T val) { + // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles + return at::exp(val); +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * bernoulli distributed with success probability `p`. + */ +template +C10_HOST_DEVICE inline T bernoulli(T val, T p) { + return val < p; +} + +}} // namespace at::transformation + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..a75b3cdb4ac8475b22cf0dd9e0f1df5b40d1ae8f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h new file mode 100644 index 0000000000000000000000000000000000000000..b50bd4da60e1ddb90cfc671cb63b6afe53a7fb64 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at { + +inline Tensor unsafeTensorFromTH(void * th_pointer, bool retain) { + auto tensor_impl = c10::intrusive_ptr::reclaim(static_cast(th_pointer)); + if (retain && tensor_impl.get() != UndefinedTensorImpl::singleton()) { + c10::raw::intrusive_ptr::incref(tensor_impl.get()); + } + return Tensor(std::move(tensor_impl)); +} + +inline Storage unsafeStorageFromTH(void * th_pointer, bool retain) { + if (retain && th_pointer) { + c10::raw::intrusive_ptr::incref(static_cast(th_pointer)); + } + return Storage(c10::intrusive_ptr::reclaim(static_cast(th_pointer))); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..8d323a26becf0790fb5b7f176593fac6225dee9f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +// A little explanation about why this file exists at all. We have +// a few methods on Tensor class which require access to reified access to +// AutogradMeta. In open source, this isn't a big deal: we just access +// torch/csrc/autograd/variable.h from aten/src/ATen/core/Tensor.cpp and +// we can put the definitions inline. This is because everything gets balled +// into a single dynamic library in the end. +// +// However, inside our Facebook internal version of our build system, we +// have a split between aten and torch/csrc. So we cannot simply just +// cross this boundary. "Now wait," you might say, "Why don't we just +// merge the libraries inside Facebook". Well, the problem is that there +// are some downstream applications which are at binary size limit, and +// incorporating all of the extra code from libtorch would push them +// over (admarket/adreview/service:adreviewservice, see also +// https://github.com/pytorch/pytorch/pull/29299) So if you want to do that, +// we have to fix all of the services like this. +// +// I didn't want to block eliminating Tensor-Variable on this work, so I +// had to introduce another dynamic dispatch to get to the variable +// implementations (which live in torch/csrc/autograd/variable.cpp, FYI). +// +// I also considered using our existing dynamic dispatch mechanism, c10 +// dispatcher, to do this. However, (1) some of the functions on Tensor +// have weird signatures that are not supported by autograd, and (2) +// see this bug https://github.com/pytorch/pytorch/issues/30102 + +namespace torch::autograd { + +struct Node; + +} // namespace torch::autograd + +namespace at::impl { + +struct TORCH_API VariableHooksInterface { + virtual ~VariableHooksInterface() = default; + virtual TensorBase tensor_data(const TensorBase&) const = 0; + virtual TensorBase variable_data(const TensorBase&) const = 0; + virtual const std::shared_ptr& grad_fn( + const TensorBase&) const = 0; + virtual unsigned _register_hook( + const TensorBase&, + std::function hook) const = 0; + virtual void remove_hook(const TensorBase&, unsigned pos) const = 0; + virtual bool is_view(const TensorBase&) const = 0; + virtual const TensorBase& base(const TensorBase&) const = 0; + virtual const std::string& name(const TensorBase&) const = 0; + virtual bool is_leaf(const TensorBase&) const = 0; + virtual int64_t output_nr(const TensorBase&) const = 0; + virtual void set_data(const TensorBase&, const TensorBase&) const = 0; + virtual TensorBase data(const TensorBase&) const = 0; + virtual int64_t _version(const TensorBase&) const = 0; + virtual void retain_grad(const TensorBase&) const = 0; + virtual bool retains_grad(const TensorBase&) const = 0; + virtual void _backward( + const Tensor&, + TensorList, + const std::optional&, + std::optional, + bool) const = 0; + virtual void requires_grad_(const TensorBase&, bool) const = 0; + virtual void basic_autograd_not_implemented_fallback( + const c10::OperatorHandle& op, + c10::DispatchKeySet dispatch_keys, + torch::jit::Stack* stack) const = 0; + virtual std::optional grad_dtype(const TensorBase&) const = 0; + virtual void set_grad_dtype(const TensorBase&, const std::optional&) const = 0; +}; + +TORCH_API void SetVariableHooks(VariableHooksInterface* hooks); +TORCH_API VariableHooksInterface* GetVariableHooks(); +TORCH_API bool HasVariableHooks(); + +struct TORCH_API VariableHooksRegisterer { + explicit VariableHooksRegisterer(VariableHooksInterface* hooks) { + SetVariableHooks(hooks); + } +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h new file mode 100644 index 0000000000000000000000000000000000000000..9c5e4676cc4dcc407a8022edbcf1f4d3589998ea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at { + +// This class allows you to write variadic functions which +// call a (possibly overloaded) function on each argument, +// in order. This is most commonly used in autogenerated code, +// where it is convenient to have a function that can uniformly +// take arguments of different types. If your arguments +// are homogeneous consider using a std::initializer_list instead. +// +// For examples of this in use, see torch/csrc/utils/variadic.h +template +struct IterArgs { + template + inline F& apply() { + return self(); + } + + // NB: Use perfect forwarding here, otherwise we'll make value + // copies of all arguments! + template + inline F& apply(T&& arg, Args&&... args) { + self()(std::forward(arg)); + if (self().short_circuit()) { + return self(); + } else { + return apply(std::forward(args)...); + } + } + + // Here are some handy overloads which provide sensible + // defaults for container-like structures that one might + // be interested in recursing into. You can enable them + // by adding: + // + // using IterArgs::operator() + // + // to your struct. These are not enabled by default because + // you may be able to process these structures more efficiently + // than handling them one-by-one. + + template + void operator()(c10::IListRef args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + template + void operator()(at::ArrayRef args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + template + void operator()(const torch::List& args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + // NB: we need to specify std::vector manually as C++ won't + // do an implicit conversion to make a template deduction go through. + template + void operator()(const std::vector& args) { + self()(at::ArrayRef{args}); + } + + constexpr bool short_circuit() const { + return false; + } + + private: + inline F& self() { + return *static_cast(this); + } +}; + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h new file mode 100644 index 0000000000000000000000000000000000000000..db051046081768c9389dd0f08a6efccb52f22405 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +#include + +namespace at::vitals { + +TORCH_API bool torchVitalEnabled(); + +struct TORCH_API TorchVitalAttr { + // always initialized to empty + std::string value; + template + TorchVitalAttr& operator<<(const T& t) { + if (torchVitalEnabled()) { + std::stringstream ss; + ss << t; + value += ss.str(); + } + return *this; + } + + template + void write(const T& t, bool force) { + if (force || torchVitalEnabled()) { + std::stringstream ss; + ss << t; + value = ss.str(); + } + } +}; + +struct TORCH_API TorchVital { + std::string name; + std::unordered_map attrs; + + explicit TorchVital(std::string n) : name(std::move(n)) {} + TorchVital(const TorchVital&) = default; + TorchVital(TorchVital&&) = default; + TorchVital& operator=(const TorchVital&) = default; + TorchVital& operator=(TorchVital&&) = default; + TorchVital() = delete; + + TorchVitalAttr& create(const std::string& attr); + TorchVitalAttr& create(const std::string& attr, bool force); + friend std::ostream& operator<<(std::ostream& os, const TorchVital& dt); + + ~TorchVital(); +}; + +std::ostream& operator<<(std::ostream& os, TorchVital const& tv); + +// A way to access vitals by string names instead of by global reference. +// This enables access to vitals from the PythonAPI. +class TORCH_API APIVitals { + public: + bool vitals_enabled; + + // Set any vital sign that was added to the map. + bool setVital( + const std::string& vital_name, + const std::string& attr_name, + const std::string& value, + bool force = false); + std::string readVitals(); + + APIVitals(); + + // Ensure this stays a singleton + APIVitals(APIVitals const& other) = delete; + APIVitals(APIVitals&& other) = delete; + APIVitals& operator=(const APIVitals&) = delete; + APIVitals& operator=(APIVitals&&) = delete; + ~APIVitals() = default; + + private: + std::unordered_map name_map_; +}; + +extern TORCH_API APIVitals VitalsAPI; + +} // namespace at::vitals + +#define TORCH_VITAL_DECLARE(name) \ + TORCH_API at::vitals::TorchVital TorchVital_##name; + +#define TORCH_VITAL_DEFINE(name) \ + TORCH_API at::vitals::TorchVital TorchVital_##name(#name); + +#define TORCH_VITAL_BASE(name) TorchVital_##name + +#define TORCH_VITAL(name, attr) TORCH_VITAL_BASE(name).create(#attr) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h new file mode 100644 index 0000000000000000000000000000000000000000..249263de784801697cc7cc2fe5cfe8fd43afdf92 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h @@ -0,0 +1,167 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +/** + * class AliasInfo + * + * Data structure to hold aliasing information for an `Argument`. They can be + * nested to represent aliasing information on contained types. + * + * There is a `beforeSet` which describes the aliasing information before the + * operator executes, and an `afterSet` that describes aliasing info + * after execution. + */ +class AliasInfo { + public: + AliasInfo() = default; + AliasInfo(bool is_write, const std::set& before_qual_strings, const std::set& after_qual_strings) : isWrite_(is_write) { + for (const auto& s: before_qual_strings) { + beforeSets_.insert(Symbol::fromQualString(s)); + } + for (const auto& s : after_qual_strings) { + afterSets_.insert(Symbol::fromQualString(s)); + } + } + // Symbol for the set that can alias anything + static Symbol wildcardSet() { + static const Symbol wc = Symbol::fromQualString("alias::*"); + return wc; + } + + void setIsWrite(bool isWrite) { + isWrite_ = isWrite; + } + + bool isWrite() const { + return isWrite_; + } + + void addBeforeSet(Symbol aliasSet) { + beforeSets_.insert(aliasSet); + } + + void addAfterSet(Symbol aliasSet) { + afterSets_.insert(aliasSet); + } + + const std::unordered_set& beforeSets() const { + return beforeSets_; + } + + const std::unordered_set& afterSets() const { + return afterSets_; + } + + Symbol beforeSet() const { + AT_ASSERT(beforeSets_.size() == 1); + return *beforeSets_.begin(); + } + + bool isWildcardBefore() const { + return beforeSets_.count(wildcardSet()) != 0; + } + + bool isWildcardAfter() const { + return afterSets_.count(wildcardSet()) != 0; + } + + // the alias info for the contained types of the type + // e.g. if this is an annotation on List[T], `sets` refers to + // the alias sets that the list may be in + // while containedTypes()[0] refers to the sets that members of the list + // may be in + void addContainedType(AliasInfo aliasInfo) { + containedTypes_.push_back(std::move(aliasInfo)); + } + const std::vector& containedTypes() const { + return containedTypes_; + } + + private: + std::unordered_set beforeSets_; + std::unordered_set afterSets_; + std::vector containedTypes_; + bool isWrite_ = false; +}; + +inline bool operator==(const AliasInfo& lhs, const AliasInfo& rhs) { + return lhs.isWrite() == rhs.isWrite() + && lhs.beforeSets() == rhs.beforeSets() + && lhs.afterSets() == rhs.afterSets() + && lhs.containedTypes() == rhs.containedTypes(); +} + +// this does match the way things are represented in the schema +inline std::ostream& operator<<(std::ostream& out, const AliasInfo& aliasInfo) { + out << '('; + bool first = true; + for (const auto& set : aliasInfo.beforeSets()) { + if (first) { + first = false; + } else { + out << '|'; + } + out << set.toUnqualString(); + } + if (aliasInfo.isWrite()) { + out << '!'; + } + if (aliasInfo.beforeSets() != aliasInfo.afterSets()) { + out << " -> "; + first = true; + for (const auto& set : aliasInfo.afterSets()) { + if (first) { + first = false; + } else { + out << '|'; + } + out << set.toUnqualString(); + } + } + out << ')'; + return out; +} +} // namespace c10 + +namespace std { +template <> + struct hash { + size_t operator()(const c10::AliasInfo& aliasInfo) const { + auto hash = std::hash()(aliasInfo.isWrite()); + + // NOTE: for unordered_set hashes, we couldn't use hash_combine + // because hash_combine is order dependent. Instead, we choose to + // use XOR as the combining function as XOR is commutative. + size_t before_set_hash_seed = 0; + for (auto &e: aliasInfo.beforeSets()) { + auto symbol_hash = std::hash()(e); + before_set_hash_seed = before_set_hash_seed ^ symbol_hash; + } + size_t after_set_hash_seed = 0; + for (auto &e: aliasInfo.afterSets()) { + auto symbol_hash = std::hash()(e); + after_set_hash_seed = after_set_hash_seed ^ symbol_hash; + } + + hash = c10::hash_combine(hash, before_set_hash_seed); + hash = c10::hash_combine(hash, after_set_hash_seed); + for (auto &e: aliasInfo.containedTypes()) { + auto contained_type_hash = std::hash()(e); + hash = c10::hash_combine(hash, contained_type_hash); + } + return hash; + } + }; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..e34758ffb70290101a01e078ed47b05248b0deea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h @@ -0,0 +1,2309 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from aten_interned_strings.h + +#if defined(TORCH_ASSERT_NO_OPERATORS) || defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS) +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if including for \ + the c10::Symbol class would be sufficient, or if your change would be \ + better placed in another file. +#endif + +// ATen symbols correspond exactly to operators defined in ATen. Every +// symbol here corresponds exactly to an ATen operation defined in +// native_functions.yaml; attributes are in one-to-one correspondence +// with their ATen name. + +#define FORALL_ATEN_BASE_SYMBOLS(_) \ +_(aten, __and__) \ +_(aten, __iand__) \ +_(aten, __ilshift__) \ +_(aten, __ior__) \ +_(aten, __irshift__) \ +_(aten, __ixor__) \ +_(aten, __lshift__) \ +_(aten, __or__) \ +_(aten, __rshift__) \ +_(aten, __xor__) \ +_(aten, _adaptive_avg_pool2d) \ +_(aten, _adaptive_avg_pool2d_backward) \ +_(aten, _adaptive_avg_pool3d) \ +_(aten, _adaptive_avg_pool3d_backward) \ +_(aten, _add_batch_dim) \ +_(aten, _add_relu) \ +_(aten, _add_relu_) \ +_(aten, _addmm_activation) \ +_(aten, _aminmax) \ +_(aten, _amp_foreach_non_finite_check_and_unscale) \ +_(aten, _amp_foreach_non_finite_check_and_unscale_) \ +_(aten, _amp_update_scale) \ +_(aten, _amp_update_scale_) \ +_(aten, _assert_async) \ +_(aten, _assert_scalar) \ +_(aten, _assert_tensor_metadata) \ +_(aten, _autocast_to_full_precision) \ +_(aten, _autocast_to_reduced_precision) \ +_(aten, _backward) \ +_(aten, _batch_norm_impl_index) \ +_(aten, _batch_norm_impl_index_backward) \ +_(aten, _batch_norm_no_update) \ +_(aten, _batch_norm_with_update) \ +_(aten, _batch_norm_with_update_functional) \ +_(aten, _cast_Byte) \ +_(aten, _cast_Char) \ +_(aten, _cast_Double) \ +_(aten, _cast_Float) \ +_(aten, _cast_Half) \ +_(aten, _cast_Int) \ +_(aten, _cast_Long) \ +_(aten, _cast_Short) \ +_(aten, _cdist_backward) \ +_(aten, _cdist_forward) \ +_(aten, _cholesky_solve_helper) \ +_(aten, _choose_qparams_per_tensor) \ +_(aten, _chunk_cat) \ +_(aten, _coalesce) \ +_(aten, _coalesced) \ +_(aten, _coalesced_) \ +_(aten, _compute_linear_combination) \ +_(aten, _conj) \ +_(aten, _conj_copy) \ +_(aten, _conj_physical) \ +_(aten, _conv_depthwise2d) \ +_(aten, _convert_indices_from_coo_to_csr) \ +_(aten, _convert_indices_from_csr_to_coo) \ +_(aten, _convert_weight_to_int4pack) \ +_(aten, _convert_weight_to_int4pack_for_cpu) \ +_(aten, _convolution) \ +_(aten, _convolution_double_backward) \ +_(aten, _convolution_mode) \ +_(aten, _copy_from) \ +_(aten, _copy_from_and_resize) \ +_(aten, _cslt_compress) \ +_(aten, _cslt_sparse_mm) \ +_(aten, _cslt_sparse_mm_search) \ +_(aten, _ctc_loss) \ +_(aten, _ctc_loss_backward) \ +_(aten, _cudnn_attention_backward) \ +_(aten, _cudnn_attention_forward) \ +_(aten, _cudnn_ctc_loss) \ +_(aten, _cudnn_init_dropout_state) \ +_(aten, _cudnn_rnn) \ +_(aten, _cudnn_rnn_backward) \ +_(aten, _cudnn_rnn_flatten_weight) \ +_(aten, _cufft_clear_plan_cache) \ +_(aten, _cufft_get_plan_cache_max_size) \ +_(aten, _cufft_get_plan_cache_size) \ +_(aten, _cufft_set_plan_cache_max_size) \ +_(aten, _cummax_helper) \ +_(aten, _cummin_helper) \ +_(aten, _debug_has_internal_overlap) \ +_(aten, _dimI) \ +_(aten, _dimV) \ +_(aten, _dim_arange) \ +_(aten, _dirichlet_grad) \ +_(aten, _dyn_quant_matmul_4bit) \ +_(aten, _dyn_quant_pack_4bit_weight) \ +_(aten, _efficient_attention_backward) \ +_(aten, _efficient_attention_forward) \ +_(aten, _efficientzerotensor) \ +_(aten, _embedding_bag) \ +_(aten, _embedding_bag_backward) \ +_(aten, _embedding_bag_dense_backward) \ +_(aten, _embedding_bag_forward_only) \ +_(aten, _embedding_bag_per_sample_weights_backward) \ +_(aten, _embedding_bag_sparse_backward) \ +_(aten, _empty_affine_quantized) \ +_(aten, _empty_per_channel_affine_quantized) \ +_(aten, _euclidean_dist) \ +_(aten, _fake_quantize_learnable_per_channel_affine) \ +_(aten, _fake_quantize_learnable_per_channel_affine_backward) \ +_(aten, _fake_quantize_learnable_per_tensor_affine) \ +_(aten, _fake_quantize_learnable_per_tensor_affine_backward) \ +_(aten, _fake_quantize_per_tensor_affine_cachemask_tensor_qparams) \ +_(aten, _fft_c2c) \ +_(aten, _fft_c2r) \ +_(aten, _fft_r2c) \ +_(aten, _fill_mem_eff_dropout_mask) \ +_(aten, _fill_mem_eff_dropout_mask_) \ +_(aten, _flash_attention_backward) \ +_(aten, _flash_attention_forward) \ +_(aten, _foobar) \ +_(aten, _foreach_abs) \ +_(aten, _foreach_abs_) \ +_(aten, _foreach_acos) \ +_(aten, _foreach_acos_) \ +_(aten, _foreach_add) \ +_(aten, _foreach_add_) \ +_(aten, _foreach_addcdiv) \ +_(aten, _foreach_addcdiv_) \ +_(aten, _foreach_addcmul) \ +_(aten, _foreach_addcmul_) \ +_(aten, _foreach_asin) \ +_(aten, _foreach_asin_) \ +_(aten, _foreach_atan) \ +_(aten, _foreach_atan_) \ +_(aten, _foreach_ceil) \ +_(aten, _foreach_ceil_) \ +_(aten, _foreach_clamp_max) \ +_(aten, _foreach_clamp_max_) \ +_(aten, _foreach_clamp_min) \ +_(aten, _foreach_clamp_min_) \ +_(aten, _foreach_copy) \ +_(aten, _foreach_copy_) \ +_(aten, _foreach_cos) \ +_(aten, _foreach_cos_) \ +_(aten, _foreach_cosh) \ +_(aten, _foreach_cosh_) \ +_(aten, _foreach_div) \ +_(aten, _foreach_div_) \ +_(aten, _foreach_erf) \ +_(aten, _foreach_erf_) \ +_(aten, _foreach_erfc) \ +_(aten, _foreach_erfc_) \ +_(aten, _foreach_exp) \ +_(aten, _foreach_exp_) \ +_(aten, _foreach_expm1) \ +_(aten, _foreach_expm1_) \ +_(aten, _foreach_floor) \ +_(aten, _foreach_floor_) \ +_(aten, _foreach_frac) \ +_(aten, _foreach_frac_) \ +_(aten, _foreach_lerp) \ +_(aten, _foreach_lerp_) \ +_(aten, _foreach_lgamma) \ +_(aten, _foreach_lgamma_) \ +_(aten, _foreach_log) \ +_(aten, _foreach_log10) \ +_(aten, _foreach_log10_) \ +_(aten, _foreach_log1p) \ +_(aten, _foreach_log1p_) \ +_(aten, _foreach_log2) \ +_(aten, _foreach_log2_) \ +_(aten, _foreach_log_) \ +_(aten, _foreach_max) \ +_(aten, _foreach_maximum) \ +_(aten, _foreach_maximum_) \ +_(aten, _foreach_minimum) \ +_(aten, _foreach_minimum_) \ +_(aten, _foreach_mul) \ +_(aten, _foreach_mul_) \ +_(aten, _foreach_neg) \ +_(aten, _foreach_neg_) \ +_(aten, _foreach_norm) \ +_(aten, _foreach_pow) \ +_(aten, _foreach_pow_) \ +_(aten, _foreach_reciprocal) \ +_(aten, _foreach_reciprocal_) \ +_(aten, _foreach_round) \ +_(aten, _foreach_round_) \ +_(aten, _foreach_rsqrt) \ +_(aten, _foreach_rsqrt_) \ +_(aten, _foreach_sigmoid) \ +_(aten, _foreach_sigmoid_) \ +_(aten, _foreach_sign) \ +_(aten, _foreach_sign_) \ +_(aten, _foreach_sin) \ +_(aten, _foreach_sin_) \ +_(aten, _foreach_sinh) \ +_(aten, _foreach_sinh_) \ +_(aten, _foreach_sqrt) \ +_(aten, _foreach_sqrt_) \ +_(aten, _foreach_sub) \ +_(aten, _foreach_sub_) \ +_(aten, _foreach_tan) \ +_(aten, _foreach_tan_) \ +_(aten, _foreach_tanh) \ +_(aten, _foreach_tanh_) \ +_(aten, _foreach_trunc) \ +_(aten, _foreach_trunc_) \ +_(aten, _foreach_zero) \ +_(aten, _foreach_zero_) \ +_(aten, _functional_assert_async) \ +_(aten, _functional_assert_scalar) \ +_(aten, _functional_sym_constrain_range) \ +_(aten, _functional_sym_constrain_range_for_size) \ +_(aten, _fused_adagrad) \ +_(aten, _fused_adagrad_) \ +_(aten, _fused_adam) \ +_(aten, _fused_adam_) \ +_(aten, _fused_adamw) \ +_(aten, _fused_adamw_) \ +_(aten, _fused_dropout) \ +_(aten, _fused_moving_avg_obs_fq_helper) \ +_(aten, _fused_moving_avg_obs_fq_helper_functional) \ +_(aten, _fused_rms_norm) \ +_(aten, _fused_rms_norm_backward) \ +_(aten, _fused_sdp_choice) \ +_(aten, _fused_sgd) \ +_(aten, _fused_sgd_) \ +_(aten, _fw_primal) \ +_(aten, _fw_primal_copy) \ +_(aten, _gather_sparse_backward) \ +_(aten, _grid_sampler_2d_cpu_fallback) \ +_(aten, _grid_sampler_2d_cpu_fallback_backward) \ +_(aten, _grouped_mm) \ +_(aten, _has_compatible_shallow_copy_type) \ +_(aten, _has_same_storage_numel) \ +_(aten, _histogramdd_bin_edges) \ +_(aten, _histogramdd_from_bin_cts) \ +_(aten, _histogramdd_from_bin_tensors) \ +_(aten, _index_put_impl) \ +_(aten, _index_put_impl_) \ +_(aten, _indices) \ +_(aten, _indices_copy) \ +_(aten, _int_mm) \ +_(aten, _is_all_true) \ +_(aten, _is_any_true) \ +_(aten, _is_zerotensor) \ +_(aten, _jagged_to_padded_dense_forward) \ +_(aten, _lazy_clone) \ +_(aten, _linalg_check_errors) \ +_(aten, _linalg_det) \ +_(aten, _linalg_eigh) \ +_(aten, _linalg_eigvals) \ +_(aten, _linalg_slogdet) \ +_(aten, _linalg_solve_ex) \ +_(aten, _linalg_svd) \ +_(aten, _local_scalar_dense) \ +_(aten, _log_softmax) \ +_(aten, _log_softmax_backward_data) \ +_(aten, _logcumsumexp) \ +_(aten, _lstm_mps) \ +_(aten, _lu_with_info) \ +_(aten, _make_dep_token) \ +_(aten, _make_dual) \ +_(aten, _make_dual_copy) \ +_(aten, _make_per_channel_quantized_tensor) \ +_(aten, _make_per_tensor_quantized_tensor) \ +_(aten, _masked_scale) \ +_(aten, _masked_softmax) \ +_(aten, _masked_softmax_backward) \ +_(aten, _mixed_dtypes_linear) \ +_(aten, _mkldnn_reshape) \ +_(aten, _mkldnn_transpose) \ +_(aten, _mkldnn_transpose_) \ +_(aten, _mps_convolution) \ +_(aten, _mps_convolution_transpose) \ +_(aten, _native_batch_norm_legit) \ +_(aten, _native_batch_norm_legit_functional) \ +_(aten, _native_batch_norm_legit_no_training) \ +_(aten, _native_multi_head_attention) \ +_(aten, _neg_view) \ +_(aten, _neg_view_copy) \ +_(aten, _nested_compute_contiguous_strides_offsets) \ +_(aten, _nested_from_padded) \ +_(aten, _nested_from_padded_and_nested_example) \ +_(aten, _nested_from_padded_tensor) \ +_(aten, _nested_get_jagged_dummy) \ +_(aten, _nested_get_lengths) \ +_(aten, _nested_get_max_seqlen) \ +_(aten, _nested_get_min_seqlen) \ +_(aten, _nested_get_offsets) \ +_(aten, _nested_get_ragged_idx) \ +_(aten, _nested_get_values) \ +_(aten, _nested_get_values_copy) \ +_(aten, _nested_select_backward) \ +_(aten, _nested_sum_backward) \ +_(aten, _nested_tensor_from_mask) \ +_(aten, _nested_tensor_from_mask_left_aligned) \ +_(aten, _nested_tensor_from_tensor_list) \ +_(aten, _nested_tensor_size) \ +_(aten, _nested_tensor_softmax_with_shape) \ +_(aten, _nested_tensor_storage_offsets) \ +_(aten, _nested_tensor_strides) \ +_(aten, _nested_view_from_buffer) \ +_(aten, _nested_view_from_buffer_copy) \ +_(aten, _nested_view_from_jagged) \ +_(aten, _nested_view_from_jagged_copy) \ +_(aten, _new_zeros_with_same_feature_meta) \ +_(aten, _nnpack_available) \ +_(aten, _nnpack_spatial_convolution) \ +_(aten, _nnz) \ +_(aten, _pack_padded_sequence) \ +_(aten, _pack_padded_sequence_backward) \ +_(aten, _pad_circular) \ +_(aten, _pad_enum) \ +_(aten, _pad_packed_sequence) \ +_(aten, _padded_dense_to_jagged_forward) \ +_(aten, _pdist_backward) \ +_(aten, _pdist_forward) \ +_(aten, _pin_memory) \ +_(aten, _prelu_kernel) \ +_(aten, _prelu_kernel_backward) \ +_(aten, _print) \ +_(aten, _propagate_xla_data) \ +_(aten, _remove_batch_dim) \ +_(aten, _reshape_alias) \ +_(aten, _reshape_alias_copy) \ +_(aten, _reshape_copy) \ +_(aten, _reshape_from_tensor) \ +_(aten, _resize_output) \ +_(aten, _resize_output_) \ +_(aten, _rowwise_prune) \ +_(aten, _safe_softmax) \ +_(aten, _sample_dirichlet) \ +_(aten, _saturate_weight_to_fp16) \ +_(aten, _scaled_dot_product_attention_math) \ +_(aten, _scaled_dot_product_attention_math_for_mps) \ +_(aten, _scaled_dot_product_cudnn_attention) \ +_(aten, _scaled_dot_product_cudnn_attention_backward) \ +_(aten, _scaled_dot_product_efficient_attention) \ +_(aten, _scaled_dot_product_efficient_attention_backward) \ +_(aten, _scaled_dot_product_flash_attention) \ +_(aten, _scaled_dot_product_flash_attention_backward) \ +_(aten, _scaled_dot_product_flash_attention_for_cpu) \ +_(aten, _scaled_dot_product_flash_attention_for_cpu_backward) \ +_(aten, _scaled_dot_product_fused_attention_overrideable) \ +_(aten, _scaled_dot_product_fused_attention_overrideable_backward) \ +_(aten, _scaled_grouped_mm) \ +_(aten, _scaled_grouped_mm_v2) \ +_(aten, _scaled_mm) \ +_(aten, _scaled_mm_v2) \ +_(aten, _segment_reduce_backward) \ +_(aten, _shape_as_tensor) \ +_(aten, _slow_conv2d_backward) \ +_(aten, _slow_conv2d_forward) \ +_(aten, _sobol_engine_draw) \ +_(aten, _sobol_engine_ff) \ +_(aten, _sobol_engine_ff_) \ +_(aten, _sobol_engine_initialize_state) \ +_(aten, _sobol_engine_initialize_state_) \ +_(aten, _sobol_engine_scramble) \ +_(aten, _sobol_engine_scramble_) \ +_(aten, _softmax) \ +_(aten, _softmax_backward_data) \ +_(aten, _sparse_addmm) \ +_(aten, _sparse_broadcast_to) \ +_(aten, _sparse_broadcast_to_copy) \ +_(aten, _sparse_bsc_tensor_unsafe) \ +_(aten, _sparse_bsr_tensor_unsafe) \ +_(aten, _sparse_compressed_tensor_unsafe) \ +_(aten, _sparse_compressed_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_unsafe) \ +_(aten, _sparse_coo_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_with_dims_and_tensors) \ +_(aten, _sparse_csc_tensor_unsafe) \ +_(aten, _sparse_csr_prod) \ +_(aten, _sparse_csr_sum) \ +_(aten, _sparse_csr_tensor_unsafe) \ +_(aten, _sparse_log_softmax) \ +_(aten, _sparse_log_softmax_backward_data) \ +_(aten, _sparse_mask_projection) \ +_(aten, _sparse_mm) \ +_(aten, _sparse_mm_reduce_impl) \ +_(aten, _sparse_mm_reduce_impl_backward) \ +_(aten, _sparse_semi_structured_addmm) \ +_(aten, _sparse_semi_structured_apply) \ +_(aten, _sparse_semi_structured_apply_dense) \ +_(aten, _sparse_semi_structured_linear) \ +_(aten, _sparse_semi_structured_mm) \ +_(aten, _sparse_semi_structured_tile) \ +_(aten, _sparse_softmax) \ +_(aten, _sparse_softmax_backward_data) \ +_(aten, _sparse_sparse_matmul) \ +_(aten, _sparse_sum) \ +_(aten, _sparse_sum_backward) \ +_(aten, _spdiags) \ +_(aten, _spsolve) \ +_(aten, _stack) \ +_(aten, _standard_gamma) \ +_(aten, _standard_gamma_grad) \ +_(aten, _test_ambiguous_defaults) \ +_(aten, _test_autograd_multiple_dispatch) \ +_(aten, _test_autograd_multiple_dispatch_view) \ +_(aten, _test_autograd_multiple_dispatch_view_copy) \ +_(aten, _test_check_tensor) \ +_(aten, _test_functorch_fallback) \ +_(aten, _test_optional_filled_intlist) \ +_(aten, _test_optional_floatlist) \ +_(aten, _test_optional_intlist) \ +_(aten, _test_parallel_materialize) \ +_(aten, _test_serialization_subcmul) \ +_(aten, _test_string_default) \ +_(aten, _test_warn_in_autograd) \ +_(aten, _thnn_differentiable_gru_cell_backward) \ +_(aten, _thnn_differentiable_lstm_cell_backward) \ +_(aten, _thnn_fused_gru_cell) \ +_(aten, _thnn_fused_gru_cell_backward) \ +_(aten, _thnn_fused_lstm_cell) \ +_(aten, _thnn_fused_lstm_cell_backward) \ +_(aten, _thnn_fused_lstm_cell_backward_impl) \ +_(aten, _to_copy) \ +_(aten, _to_cpu) \ +_(aten, _to_dense) \ +_(aten, _to_sparse) \ +_(aten, _to_sparse_bsc) \ +_(aten, _to_sparse_bsr) \ +_(aten, _to_sparse_csc) \ +_(aten, _to_sparse_csr) \ +_(aten, _to_sparse_semi_structured) \ +_(aten, _transform_bias_rescale_qkv) \ +_(aten, _transformer_encoder_layer_fwd) \ +_(aten, _trilinear) \ +_(aten, _triton_multi_head_attention) \ +_(aten, _triton_scaled_dot_attention) \ +_(aten, _unique) \ +_(aten, _unique2) \ +_(aten, _unpack_dual) \ +_(aten, _unsafe_index) \ +_(aten, _unsafe_index_put) \ +_(aten, _unsafe_masked_index) \ +_(aten, _unsafe_masked_index_put_accumulate) \ +_(aten, _unsafe_view) \ +_(aten, _upsample_bicubic2d_aa) \ +_(aten, _upsample_bicubic2d_aa_backward) \ +_(aten, _upsample_bilinear2d_aa) \ +_(aten, _upsample_bilinear2d_aa_backward) \ +_(aten, _upsample_nearest_exact1d) \ +_(aten, _upsample_nearest_exact1d_backward) \ +_(aten, _upsample_nearest_exact2d) \ +_(aten, _upsample_nearest_exact2d_backward) \ +_(aten, _upsample_nearest_exact3d) \ +_(aten, _upsample_nearest_exact3d_backward) \ +_(aten, _use_cudnn_ctc_loss) \ +_(aten, _use_cudnn_rnn_flatten_weight) \ +_(aten, _validate_compressed_sparse_indices) \ +_(aten, _validate_sparse_bsc_tensor_args) \ +_(aten, _validate_sparse_bsr_tensor_args) \ +_(aten, _validate_sparse_compressed_tensor_args) \ +_(aten, _validate_sparse_coo_tensor_args) \ +_(aten, _validate_sparse_csc_tensor_args) \ +_(aten, _validate_sparse_csr_tensor_args) \ +_(aten, _values) \ +_(aten, _values_copy) \ +_(aten, _version) \ +_(aten, _weight_int4pack_mm) \ +_(aten, _weight_int4pack_mm_for_cpu) \ +_(aten, _weight_int4pack_mm_with_scales_and_zeros) \ +_(aten, _weight_int8pack_mm) \ +_(aten, _weight_norm) \ +_(aten, _weight_norm_differentiable_backward) \ +_(aten, _weight_norm_interface) \ +_(aten, _weight_norm_interface_backward) \ +_(aten, _wrapped_linear_prepack) \ +_(aten, _wrapped_quantized_linear_prepacked) \ +_(aten, abs) \ +_(aten, abs_) \ +_(aten, absolute) \ +_(aten, absolute_) \ +_(aten, acos) \ +_(aten, acos_) \ +_(aten, acosh) \ +_(aten, acosh_) \ +_(aten, adaptive_avg_pool1d) \ +_(aten, adaptive_avg_pool2d) \ +_(aten, adaptive_avg_pool3d) \ +_(aten, adaptive_avg_pool3d_backward) \ +_(aten, adaptive_max_pool1d) \ +_(aten, adaptive_max_pool2d) \ +_(aten, adaptive_max_pool2d_backward) \ +_(aten, adaptive_max_pool3d) \ +_(aten, adaptive_max_pool3d_backward) \ +_(aten, add) \ +_(aten, add_) \ +_(aten, addbmm) \ +_(aten, addbmm_) \ +_(aten, addcdiv) \ +_(aten, addcdiv_) \ +_(aten, addcmul) \ +_(aten, addcmul_) \ +_(aten, addmm) \ +_(aten, addmm_) \ +_(aten, addmv) \ +_(aten, addmv_) \ +_(aten, addr) \ +_(aten, addr_) \ +_(aten, adjoint) \ +_(aten, affine_grid_generator) \ +_(aten, affine_grid_generator_backward) \ +_(aten, alias) \ +_(aten, alias_copy) \ +_(aten, align_as) \ +_(aten, align_tensors) \ +_(aten, align_to) \ +_(aten, all) \ +_(aten, allclose) \ +_(aten, alpha_dropout) \ +_(aten, alpha_dropout_) \ +_(aten, amax) \ +_(aten, amin) \ +_(aten, aminmax) \ +_(aten, angle) \ +_(aten, any) \ +_(aten, arange) \ +_(aten, arccos) \ +_(aten, arccos_) \ +_(aten, arccosh) \ +_(aten, arccosh_) \ +_(aten, arcsin) \ +_(aten, arcsin_) \ +_(aten, arcsinh) \ +_(aten, arcsinh_) \ +_(aten, arctan) \ +_(aten, arctan2) \ +_(aten, arctan2_) \ +_(aten, arctan_) \ +_(aten, arctanh) \ +_(aten, arctanh_) \ +_(aten, argmax) \ +_(aten, argmin) \ +_(aten, argsort) \ +_(aten, argwhere) \ +_(aten, as_strided) \ +_(aten, as_strided_) \ +_(aten, as_strided_copy) \ +_(aten, as_strided_scatter) \ +_(aten, asin) \ +_(aten, asin_) \ +_(aten, asinh) \ +_(aten, asinh_) \ +_(aten, atan) \ +_(aten, atan2) \ +_(aten, atan2_) \ +_(aten, atan_) \ +_(aten, atanh) \ +_(aten, atanh_) \ +_(aten, atleast_1d) \ +_(aten, atleast_2d) \ +_(aten, atleast_3d) \ +_(aten, avg_pool1d) \ +_(aten, avg_pool2d) \ +_(aten, avg_pool2d_backward) \ +_(aten, avg_pool3d) \ +_(aten, avg_pool3d_backward) \ +_(aten, baddbmm) \ +_(aten, baddbmm_) \ +_(aten, bartlett_window) \ +_(aten, batch_norm) \ +_(aten, batch_norm_backward) \ +_(aten, batch_norm_backward_elemt) \ +_(aten, batch_norm_backward_reduce) \ +_(aten, batch_norm_elemt) \ +_(aten, batch_norm_gather_stats) \ +_(aten, batch_norm_gather_stats_with_counts) \ +_(aten, batch_norm_stats) \ +_(aten, batch_norm_update_stats) \ +_(aten, bernoulli) \ +_(aten, bernoulli_) \ +_(aten, bilinear) \ +_(aten, binary_cross_entropy) \ +_(aten, binary_cross_entropy_backward) \ +_(aten, binary_cross_entropy_with_logits) \ +_(aten, bincount) \ +_(aten, binomial) \ +_(aten, bitwise_and) \ +_(aten, bitwise_and_) \ +_(aten, bitwise_left_shift) \ +_(aten, bitwise_left_shift_) \ +_(aten, bitwise_not) \ +_(aten, bitwise_not_) \ +_(aten, bitwise_or) \ +_(aten, bitwise_or_) \ +_(aten, bitwise_right_shift) \ +_(aten, bitwise_right_shift_) \ +_(aten, bitwise_xor) \ +_(aten, bitwise_xor_) \ +_(aten, blackman_window) \ +_(aten, block_diag) \ +_(aten, bmm) \ +_(aten, broadcast_tensors) \ +_(aten, broadcast_to) \ +_(aten, bucketize) \ +_(aten, can_cast) \ +_(aten, cartesian_prod) \ +_(aten, cat) \ +_(aten, cauchy) \ +_(aten, cauchy_) \ +_(aten, ccol_indices) \ +_(aten, ccol_indices_copy) \ +_(aten, cdist) \ +_(aten, ceil) \ +_(aten, ceil_) \ +_(aten, celu) \ +_(aten, celu_) \ +_(aten, chain_matmul) \ +_(aten, chalf) \ +_(aten, channel_shuffle) \ +_(aten, cholesky) \ +_(aten, cholesky_inverse) \ +_(aten, cholesky_solve) \ +_(aten, choose_qparams_optimized) \ +_(aten, chunk) \ +_(aten, clamp) \ +_(aten, clamp_) \ +_(aten, clamp_max) \ +_(aten, clamp_max_) \ +_(aten, clamp_min) \ +_(aten, clamp_min_) \ +_(aten, clip) \ +_(aten, clip_) \ +_(aten, clone) \ +_(aten, coalesce) \ +_(aten, col2im) \ +_(aten, col_indices) \ +_(aten, col_indices_copy) \ +_(aten, column_stack) \ +_(aten, combinations) \ +_(aten, complex) \ +_(aten, concat) \ +_(aten, concatenate) \ +_(aten, conj) \ +_(aten, conj_physical) \ +_(aten, conj_physical_) \ +_(aten, constant_pad_nd) \ +_(aten, contiguous) \ +_(aten, conv1d) \ +_(aten, conv2d) \ +_(aten, conv3d) \ +_(aten, conv_depthwise3d) \ +_(aten, conv_tbc) \ +_(aten, conv_tbc_backward) \ +_(aten, conv_transpose1d) \ +_(aten, conv_transpose2d) \ +_(aten, conv_transpose3d) \ +_(aten, convolution) \ +_(aten, convolution_backward) \ +_(aten, convolution_backward_overrideable) \ +_(aten, convolution_overrideable) \ +_(aten, copy) \ +_(aten, copy_) \ +_(aten, copy_sparse_to_sparse) \ +_(aten, copy_sparse_to_sparse_) \ +_(aten, copysign) \ +_(aten, copysign_) \ +_(aten, corrcoef) \ +_(aten, cos) \ +_(aten, cos_) \ +_(aten, cosh) \ +_(aten, cosh_) \ +_(aten, cosine_embedding_loss) \ +_(aten, cosine_similarity) \ +_(aten, count_nonzero) \ +_(aten, cov) \ +_(aten, cross) \ +_(aten, cross_entropy_loss) \ +_(aten, crow_indices) \ +_(aten, crow_indices_copy) \ +_(aten, ctc_loss) \ +_(aten, cudnn_affine_grid_generator) \ +_(aten, cudnn_affine_grid_generator_backward) \ +_(aten, cudnn_batch_norm) \ +_(aten, cudnn_batch_norm_backward) \ +_(aten, cudnn_convolution) \ +_(aten, cudnn_convolution_add_relu) \ +_(aten, cudnn_convolution_relu) \ +_(aten, cudnn_convolution_transpose) \ +_(aten, cudnn_grid_sampler) \ +_(aten, cudnn_grid_sampler_backward) \ +_(aten, cudnn_is_acceptable) \ +_(aten, cummax) \ +_(aten, cummaxmin_backward) \ +_(aten, cummin) \ +_(aten, cumprod) \ +_(aten, cumprod_) \ +_(aten, cumprod_backward) \ +_(aten, cumsum) \ +_(aten, cumsum_) \ +_(aten, cumulative_trapezoid) \ +_(aten, data) \ +_(aten, deg2rad) \ +_(aten, deg2rad_) \ +_(aten, dense_dim) \ +_(aten, dequantize) \ +_(aten, det) \ +_(aten, detach) \ +_(aten, detach_) \ +_(aten, detach_copy) \ +_(aten, diag) \ +_(aten, diag_embed) \ +_(aten, diagflat) \ +_(aten, diagonal) \ +_(aten, diagonal_backward) \ +_(aten, diagonal_copy) \ +_(aten, diagonal_scatter) \ +_(aten, diff) \ +_(aten, digamma) \ +_(aten, digamma_) \ +_(aten, dist) \ +_(aten, div) \ +_(aten, div_) \ +_(aten, divide) \ +_(aten, divide_) \ +_(aten, dot) \ +_(aten, dropout) \ +_(aten, dropout_) \ +_(aten, dsplit) \ +_(aten, dstack) \ +_(aten, einsum) \ +_(aten, elu) \ +_(aten, elu_) \ +_(aten, elu_backward) \ +_(aten, embedding) \ +_(aten, embedding_backward) \ +_(aten, embedding_bag) \ +_(aten, embedding_dense_backward) \ +_(aten, embedding_renorm) \ +_(aten, embedding_renorm_) \ +_(aten, embedding_sparse_backward) \ +_(aten, empty) \ +_(aten, empty_like) \ +_(aten, empty_permuted) \ +_(aten, empty_quantized) \ +_(aten, empty_strided) \ +_(aten, eq) \ +_(aten, eq_) \ +_(aten, equal) \ +_(aten, erf) \ +_(aten, erf_) \ +_(aten, erfc) \ +_(aten, erfc_) \ +_(aten, erfinv) \ +_(aten, erfinv_) \ +_(aten, exp) \ +_(aten, exp2) \ +_(aten, exp2_) \ +_(aten, exp_) \ +_(aten, expand) \ +_(aten, expand_as) \ +_(aten, expand_copy) \ +_(aten, expm1) \ +_(aten, expm1_) \ +_(aten, exponential) \ +_(aten, exponential_) \ +_(aten, eye) \ +_(aten, fake_quantize_per_channel_affine) \ +_(aten, fake_quantize_per_channel_affine_cachemask) \ +_(aten, fake_quantize_per_channel_affine_cachemask_backward) \ +_(aten, fake_quantize_per_tensor_affine) \ +_(aten, fake_quantize_per_tensor_affine_cachemask) \ +_(aten, fake_quantize_per_tensor_affine_cachemask_backward) \ +_(aten, fbgemm_linear_fp16_weight) \ +_(aten, fbgemm_linear_fp16_weight_fp32_activation) \ +_(aten, fbgemm_linear_int8_weight) \ +_(aten, fbgemm_linear_int8_weight_fp32_activation) \ +_(aten, fbgemm_linear_quantize_weight) \ +_(aten, fbgemm_pack_gemm_matrix_fp16) \ +_(aten, fbgemm_pack_quantized_matrix) \ +_(aten, feature_alpha_dropout) \ +_(aten, feature_alpha_dropout_) \ +_(aten, feature_dropout) \ +_(aten, feature_dropout_) \ +_(aten, fft_fft) \ +_(aten, fft_fft2) \ +_(aten, fft_fftfreq) \ +_(aten, fft_fftn) \ +_(aten, fft_fftshift) \ +_(aten, fft_hfft) \ +_(aten, fft_hfft2) \ +_(aten, fft_hfftn) \ +_(aten, fft_ifft) \ +_(aten, fft_ifft2) \ +_(aten, fft_ifftn) \ +_(aten, fft_ifftshift) \ +_(aten, fft_ihfft) \ +_(aten, fft_ihfft2) \ +_(aten, fft_ihfftn) \ +_(aten, fft_irfft) \ +_(aten, fft_irfft2) \ +_(aten, fft_irfftn) \ +_(aten, fft_rfft) \ +_(aten, fft_rfft2) \ +_(aten, fft_rfftfreq) \ +_(aten, fft_rfftn) \ +_(aten, fill) \ +_(aten, fill_) \ +_(aten, fill_diagonal) \ +_(aten, fill_diagonal_) \ +_(aten, fix) \ +_(aten, fix_) \ +_(aten, flatten) \ +_(aten, flatten_dense_tensors) \ +_(aten, flip) \ +_(aten, fliplr) \ +_(aten, flipud) \ +_(aten, float_power) \ +_(aten, float_power_) \ +_(aten, floor) \ +_(aten, floor_) \ +_(aten, floor_divide) \ +_(aten, floor_divide_) \ +_(aten, fmax) \ +_(aten, fmin) \ +_(aten, fmod) \ +_(aten, fmod_) \ +_(aten, frac) \ +_(aten, frac_) \ +_(aten, fractional_max_pool2d) \ +_(aten, fractional_max_pool2d_backward) \ +_(aten, fractional_max_pool3d) \ +_(aten, fractional_max_pool3d_backward) \ +_(aten, frexp) \ +_(aten, frobenius_norm) \ +_(aten, from_file) \ +_(aten, full) \ +_(aten, full_like) \ +_(aten, fused_moving_avg_obs_fake_quant) \ +_(aten, gather) \ +_(aten, gather_backward) \ +_(aten, gcd) \ +_(aten, gcd_) \ +_(aten, ge) \ +_(aten, ge_) \ +_(aten, gelu) \ +_(aten, gelu_) \ +_(aten, gelu_backward) \ +_(aten, geometric) \ +_(aten, geometric_) \ +_(aten, geqrf) \ +_(aten, ger) \ +_(aten, glu) \ +_(aten, glu_backward) \ +_(aten, glu_backward_jvp) \ +_(aten, glu_jvp) \ +_(aten, gradient) \ +_(aten, greater) \ +_(aten, greater_) \ +_(aten, greater_equal) \ +_(aten, greater_equal_) \ +_(aten, grid_sampler) \ +_(aten, grid_sampler_2d) \ +_(aten, grid_sampler_2d_backward) \ +_(aten, grid_sampler_3d) \ +_(aten, grid_sampler_3d_backward) \ +_(aten, group_norm) \ +_(aten, gru) \ +_(aten, gru_cell) \ +_(aten, gt) \ +_(aten, gt_) \ +_(aten, hamming_window) \ +_(aten, hann_window) \ +_(aten, hardshrink) \ +_(aten, hardshrink_backward) \ +_(aten, hardsigmoid) \ +_(aten, hardsigmoid_) \ +_(aten, hardsigmoid_backward) \ +_(aten, hardswish) \ +_(aten, hardswish_) \ +_(aten, hardswish_backward) \ +_(aten, hardtanh) \ +_(aten, hardtanh_) \ +_(aten, hardtanh_backward) \ +_(aten, hash_tensor) \ +_(aten, heaviside) \ +_(aten, heaviside_) \ +_(aten, hinge_embedding_loss) \ +_(aten, histc) \ +_(aten, histogram) \ +_(aten, histogramdd) \ +_(aten, hsplit) \ +_(aten, hspmm) \ +_(aten, hstack) \ +_(aten, huber_loss) \ +_(aten, huber_loss_backward) \ +_(aten, hypot) \ +_(aten, hypot_) \ +_(aten, i0) \ +_(aten, i0_) \ +_(aten, igamma) \ +_(aten, igamma_) \ +_(aten, igammac) \ +_(aten, igammac_) \ +_(aten, im2col) \ +_(aten, imag) \ +_(aten, index) \ +_(aten, index_add) \ +_(aten, index_add_) \ +_(aten, index_copy) \ +_(aten, index_copy_) \ +_(aten, index_fill) \ +_(aten, index_fill_) \ +_(aten, index_put) \ +_(aten, index_put_) \ +_(aten, index_reduce) \ +_(aten, index_reduce_) \ +_(aten, index_select) \ +_(aten, index_select_backward) \ +_(aten, indices) \ +_(aten, indices_copy) \ +_(aten, infinitely_differentiable_gelu_backward) \ +_(aten, inner) \ +_(aten, instance_norm) \ +_(aten, int_repr) \ +_(aten, inverse) \ +_(aten, is_coalesced) \ +_(aten, is_complex) \ +_(aten, is_conj) \ +_(aten, is_distributed) \ +_(aten, is_floating_point) \ +_(aten, is_inference) \ +_(aten, is_leaf) \ +_(aten, is_neg) \ +_(aten, is_nonzero) \ +_(aten, is_pinned) \ +_(aten, is_same_size) \ +_(aten, is_set_to) \ +_(aten, is_signed) \ +_(aten, is_vulkan_available) \ +_(aten, isclose) \ +_(aten, isfinite) \ +_(aten, isin) \ +_(aten, isinf) \ +_(aten, isnan) \ +_(aten, isneginf) \ +_(aten, isposinf) \ +_(aten, isreal) \ +_(aten, istft) \ +_(aten, item) \ +_(aten, kaiser_window) \ +_(aten, kl_div) \ +_(aten, kron) \ +_(aten, kthvalue) \ +_(aten, l1_loss) \ +_(aten, layer_norm) \ +_(aten, lcm) \ +_(aten, lcm_) \ +_(aten, ldexp) \ +_(aten, ldexp_) \ +_(aten, le) \ +_(aten, le_) \ +_(aten, leaky_relu) \ +_(aten, leaky_relu_) \ +_(aten, leaky_relu_backward) \ +_(aten, lerp) \ +_(aten, lerp_) \ +_(aten, less) \ +_(aten, less_) \ +_(aten, less_equal) \ +_(aten, less_equal_) \ +_(aten, lgamma) \ +_(aten, lgamma_) \ +_(aten, lift) \ +_(aten, lift_fresh) \ +_(aten, lift_fresh_copy) \ +_(aten, linalg_cholesky) \ +_(aten, linalg_cholesky_ex) \ +_(aten, linalg_cond) \ +_(aten, linalg_cross) \ +_(aten, linalg_det) \ +_(aten, linalg_diagonal) \ +_(aten, linalg_eig) \ +_(aten, linalg_eigh) \ +_(aten, linalg_eigvals) \ +_(aten, linalg_eigvalsh) \ +_(aten, linalg_householder_product) \ +_(aten, linalg_inv) \ +_(aten, linalg_inv_ex) \ +_(aten, linalg_ldl_factor) \ +_(aten, linalg_ldl_factor_ex) \ +_(aten, linalg_ldl_solve) \ +_(aten, linalg_lstsq) \ +_(aten, linalg_lu) \ +_(aten, linalg_lu_factor) \ +_(aten, linalg_lu_factor_ex) \ +_(aten, linalg_lu_solve) \ +_(aten, linalg_matmul) \ +_(aten, linalg_matrix_exp) \ +_(aten, linalg_matrix_norm) \ +_(aten, linalg_matrix_power) \ +_(aten, linalg_matrix_rank) \ +_(aten, linalg_multi_dot) \ +_(aten, linalg_norm) \ +_(aten, linalg_pinv) \ +_(aten, linalg_qr) \ +_(aten, linalg_slogdet) \ +_(aten, linalg_solve) \ +_(aten, linalg_solve_ex) \ +_(aten, linalg_solve_triangular) \ +_(aten, linalg_svd) \ +_(aten, linalg_svdvals) \ +_(aten, linalg_tensorinv) \ +_(aten, linalg_tensorsolve) \ +_(aten, linalg_vander) \ +_(aten, linalg_vecdot) \ +_(aten, linalg_vector_norm) \ +_(aten, linear) \ +_(aten, linear_backward) \ +_(aten, linspace) \ +_(aten, log) \ +_(aten, log10) \ +_(aten, log10_) \ +_(aten, log1p) \ +_(aten, log1p_) \ +_(aten, log2) \ +_(aten, log2_) \ +_(aten, log_) \ +_(aten, log_normal) \ +_(aten, log_normal_) \ +_(aten, log_sigmoid) \ +_(aten, log_sigmoid_backward) \ +_(aten, log_sigmoid_forward) \ +_(aten, log_softmax) \ +_(aten, logaddexp) \ +_(aten, logaddexp2) \ +_(aten, logcumsumexp) \ +_(aten, logdet) \ +_(aten, logical_and) \ +_(aten, logical_and_) \ +_(aten, logical_not) \ +_(aten, logical_not_) \ +_(aten, logical_or) \ +_(aten, logical_or_) \ +_(aten, logical_xor) \ +_(aten, logical_xor_) \ +_(aten, logit) \ +_(aten, logit_) \ +_(aten, logit_backward) \ +_(aten, logspace) \ +_(aten, logsumexp) \ +_(aten, lshift) \ +_(aten, lstm) \ +_(aten, lstm_cell) \ +_(aten, lstm_mps_backward) \ +_(aten, lt) \ +_(aten, lt_) \ +_(aten, lu_solve) \ +_(aten, lu_unpack) \ +_(aten, mH) \ +_(aten, mT) \ +_(aten, margin_ranking_loss) \ +_(aten, masked_fill) \ +_(aten, masked_fill_) \ +_(aten, masked_scatter) \ +_(aten, masked_scatter_) \ +_(aten, masked_scatter_backward) \ +_(aten, masked_select) \ +_(aten, masked_select_backward) \ +_(aten, matmul) \ +_(aten, matmul_backward) \ +_(aten, matrix_H) \ +_(aten, matrix_exp) \ +_(aten, matrix_exp_backward) \ +_(aten, matrix_power) \ +_(aten, max) \ +_(aten, max_pool1d) \ +_(aten, max_pool1d_with_indices) \ +_(aten, max_pool2d) \ +_(aten, max_pool2d_backward) \ +_(aten, max_pool2d_with_indices) \ +_(aten, max_pool2d_with_indices_backward) \ +_(aten, max_pool3d) \ +_(aten, max_pool3d_with_indices) \ +_(aten, max_pool3d_with_indices_backward) \ +_(aten, max_unpool2d) \ +_(aten, max_unpool3d) \ +_(aten, maximum) \ +_(aten, mean) \ +_(aten, median) \ +_(aten, meshgrid) \ +_(aten, min) \ +_(aten, minimum) \ +_(aten, miopen_batch_norm) \ +_(aten, miopen_batch_norm_backward) \ +_(aten, miopen_convolution) \ +_(aten, miopen_convolution_add_relu) \ +_(aten, miopen_convolution_relu) \ +_(aten, miopen_convolution_transpose) \ +_(aten, miopen_depthwise_convolution) \ +_(aten, miopen_rnn) \ +_(aten, miopen_rnn_backward) \ +_(aten, mish) \ +_(aten, mish_) \ +_(aten, mish_backward) \ +_(aten, mkldnn_adaptive_avg_pool2d) \ +_(aten, mkldnn_adaptive_avg_pool2d_backward) \ +_(aten, mkldnn_convolution) \ +_(aten, mkldnn_linear) \ +_(aten, mkldnn_linear_backward) \ +_(aten, mkldnn_linear_backward_input) \ +_(aten, mkldnn_linear_backward_weights) \ +_(aten, mkldnn_max_pool2d) \ +_(aten, mkldnn_max_pool2d_backward) \ +_(aten, mkldnn_max_pool3d) \ +_(aten, mkldnn_max_pool3d_backward) \ +_(aten, mkldnn_reorder_conv2d_weight) \ +_(aten, mkldnn_reorder_conv3d_weight) \ +_(aten, mkldnn_rnn_layer) \ +_(aten, mkldnn_rnn_layer_backward) \ +_(aten, mm) \ +_(aten, mode) \ +_(aten, moveaxis) \ +_(aten, movedim) \ +_(aten, mps_convolution_backward) \ +_(aten, mps_convolution_transpose_backward) \ +_(aten, mse_loss) \ +_(aten, mse_loss_backward) \ +_(aten, msort) \ +_(aten, mul) \ +_(aten, mul_) \ +_(aten, multi_margin_loss) \ +_(aten, multi_margin_loss_backward) \ +_(aten, multilabel_margin_loss) \ +_(aten, multilabel_margin_loss_backward) \ +_(aten, multilabel_margin_loss_forward) \ +_(aten, multinomial) \ +_(aten, multiply) \ +_(aten, multiply_) \ +_(aten, mv) \ +_(aten, mvlgamma) \ +_(aten, mvlgamma_) \ +_(aten, nan_to_num) \ +_(aten, nan_to_num_) \ +_(aten, nanmean) \ +_(aten, nanmedian) \ +_(aten, nanquantile) \ +_(aten, nansum) \ +_(aten, narrow) \ +_(aten, narrow_copy) \ +_(aten, native_batch_norm) \ +_(aten, native_batch_norm_backward) \ +_(aten, native_channel_shuffle) \ +_(aten, native_dropout) \ +_(aten, native_dropout_backward) \ +_(aten, native_group_norm) \ +_(aten, native_group_norm_backward) \ +_(aten, native_layer_norm) \ +_(aten, native_layer_norm_backward) \ +_(aten, native_norm) \ +_(aten, ne) \ +_(aten, ne_) \ +_(aten, neg) \ +_(aten, neg_) \ +_(aten, negative) \ +_(aten, negative_) \ +_(aten, nested_to_padded_tensor) \ +_(aten, new_empty) \ +_(aten, new_empty_strided) \ +_(aten, new_full) \ +_(aten, new_ones) \ +_(aten, new_zeros) \ +_(aten, nextafter) \ +_(aten, nextafter_) \ +_(aten, nll_loss) \ +_(aten, nll_loss2d) \ +_(aten, nll_loss2d_backward) \ +_(aten, nll_loss2d_forward) \ +_(aten, nll_loss_backward) \ +_(aten, nll_loss_forward) \ +_(aten, nll_loss_nd) \ +_(aten, nonzero) \ +_(aten, nonzero_numpy) \ +_(aten, nonzero_static) \ +_(aten, norm) \ +_(aten, norm_except_dim) \ +_(aten, normal) \ +_(aten, normal_) \ +_(aten, normal_functional) \ +_(aten, not_equal) \ +_(aten, not_equal_) \ +_(aten, nuclear_norm) \ +_(aten, numpy_T) \ +_(aten, one_hot) \ +_(aten, ones) \ +_(aten, ones_like) \ +_(aten, orgqr) \ +_(aten, ormqr) \ +_(aten, outer) \ +_(aten, output_nr) \ +_(aten, pad) \ +_(aten, pad_sequence) \ +_(aten, pairwise_distance) \ +_(aten, pdist) \ +_(aten, permute) \ +_(aten, permute_copy) \ +_(aten, pin_memory) \ +_(aten, pinverse) \ +_(aten, pixel_shuffle) \ +_(aten, pixel_unshuffle) \ +_(aten, poisson) \ +_(aten, poisson_nll_loss) \ +_(aten, polar) \ +_(aten, polygamma) \ +_(aten, polygamma_) \ +_(aten, positive) \ +_(aten, pow) \ +_(aten, pow_) \ +_(aten, prelu) \ +_(aten, prod) \ +_(aten, promote_types) \ +_(aten, put) \ +_(aten, put_) \ +_(aten, q_per_channel_axis) \ +_(aten, q_per_channel_scales) \ +_(aten, q_per_channel_zero_points) \ +_(aten, q_scale) \ +_(aten, q_zero_point) \ +_(aten, qr) \ +_(aten, qscheme) \ +_(aten, quantile) \ +_(aten, quantize_per_channel) \ +_(aten, quantize_per_tensor) \ +_(aten, quantize_per_tensor_dynamic) \ +_(aten, quantized_batch_norm) \ +_(aten, quantized_gru_cell) \ +_(aten, quantized_lstm_cell) \ +_(aten, quantized_max_pool1d) \ +_(aten, quantized_max_pool2d) \ +_(aten, quantized_max_pool3d) \ +_(aten, quantized_rnn_relu_cell) \ +_(aten, quantized_rnn_tanh_cell) \ +_(aten, rad2deg) \ +_(aten, rad2deg_) \ +_(aten, rand) \ +_(aten, rand_like) \ +_(aten, randint) \ +_(aten, randint_like) \ +_(aten, randn) \ +_(aten, randn_like) \ +_(aten, random) \ +_(aten, random_) \ +_(aten, randperm) \ +_(aten, range) \ +_(aten, ravel) \ +_(aten, real) \ +_(aten, reciprocal) \ +_(aten, reciprocal_) \ +_(aten, record_stream) \ +_(aten, refine_names) \ +_(aten, reflection_pad1d) \ +_(aten, reflection_pad1d_backward) \ +_(aten, reflection_pad2d) \ +_(aten, reflection_pad2d_backward) \ +_(aten, reflection_pad3d) \ +_(aten, reflection_pad3d_backward) \ +_(aten, relu) \ +_(aten, relu6) \ +_(aten, relu6_) \ +_(aten, relu_) \ +_(aten, remainder) \ +_(aten, remainder_) \ +_(aten, rename) \ +_(aten, rename_) \ +_(aten, renorm) \ +_(aten, renorm_) \ +_(aten, repeat) \ +_(aten, repeat_interleave) \ +_(aten, replication_pad1d) \ +_(aten, replication_pad1d_backward) \ +_(aten, replication_pad2d) \ +_(aten, replication_pad2d_backward) \ +_(aten, replication_pad3d) \ +_(aten, replication_pad3d_backward) \ +_(aten, requires_grad) \ +_(aten, requires_grad_) \ +_(aten, reshape) \ +_(aten, reshape_as) \ +_(aten, resize) \ +_(aten, resize_) \ +_(aten, resize_as) \ +_(aten, resize_as_) \ +_(aten, resize_as_sparse) \ +_(aten, resize_as_sparse_) \ +_(aten, resolve_conj) \ +_(aten, resolve_neg) \ +_(aten, result_type) \ +_(aten, retain_grad) \ +_(aten, retains_grad) \ +_(aten, rms_norm) \ +_(aten, rnn_relu) \ +_(aten, rnn_relu_cell) \ +_(aten, rnn_tanh) \ +_(aten, rnn_tanh_cell) \ +_(aten, roll) \ +_(aten, rot90) \ +_(aten, round) \ +_(aten, round_) \ +_(aten, row_indices) \ +_(aten, row_indices_copy) \ +_(aten, row_stack) \ +_(aten, rrelu) \ +_(aten, rrelu_) \ +_(aten, rrelu_with_noise) \ +_(aten, rrelu_with_noise_) \ +_(aten, rrelu_with_noise_backward) \ +_(aten, rrelu_with_noise_functional) \ +_(aten, rshift) \ +_(aten, rsqrt) \ +_(aten, rsqrt_) \ +_(aten, rsub) \ +_(aten, scalar_tensor) \ +_(aten, scaled_dot_product_attention) \ +_(aten, scatter) \ +_(aten, scatter_) \ +_(aten, scatter_add) \ +_(aten, scatter_add_) \ +_(aten, scatter_reduce) \ +_(aten, scatter_reduce_) \ +_(aten, searchsorted) \ +_(aten, segment_reduce) \ +_(aten, select) \ +_(aten, select_backward) \ +_(aten, select_copy) \ +_(aten, select_scatter) \ +_(aten, selu) \ +_(aten, selu_) \ +_(aten, set) \ +_(aten, set_) \ +_(aten, set_data) \ +_(aten, sgn) \ +_(aten, sgn_) \ +_(aten, sigmoid) \ +_(aten, sigmoid_) \ +_(aten, sigmoid_backward) \ +_(aten, sign) \ +_(aten, sign_) \ +_(aten, signbit) \ +_(aten, silu) \ +_(aten, silu_) \ +_(aten, silu_backward) \ +_(aten, sin) \ +_(aten, sin_) \ +_(aten, sinc) \ +_(aten, sinc_) \ +_(aten, sinh) \ +_(aten, sinh_) \ +_(aten, size) \ +_(aten, slice) \ +_(aten, slice_backward) \ +_(aten, slice_copy) \ +_(aten, slice_inverse) \ +_(aten, slice_scatter) \ +_(aten, slogdet) \ +_(aten, slow_conv3d) \ +_(aten, slow_conv3d_forward) \ +_(aten, slow_conv_dilated2d) \ +_(aten, slow_conv_dilated3d) \ +_(aten, slow_conv_transpose2d) \ +_(aten, slow_conv_transpose3d) \ +_(aten, smm) \ +_(aten, smooth_l1_loss) \ +_(aten, smooth_l1_loss_backward) \ +_(aten, soft_margin_loss) \ +_(aten, soft_margin_loss_backward) \ +_(aten, softmax) \ +_(aten, softplus) \ +_(aten, softplus_backward) \ +_(aten, softshrink) \ +_(aten, softshrink_backward) \ +_(aten, sort) \ +_(aten, sparse_bsc_tensor) \ +_(aten, sparse_bsr_tensor) \ +_(aten, sparse_compressed_tensor) \ +_(aten, sparse_coo_tensor) \ +_(aten, sparse_csc_tensor) \ +_(aten, sparse_csr_tensor) \ +_(aten, sparse_dim) \ +_(aten, sparse_mask) \ +_(aten, sparse_resize) \ +_(aten, sparse_resize_) \ +_(aten, sparse_resize_and_clear) \ +_(aten, sparse_resize_and_clear_) \ +_(aten, sparse_sampled_addmm) \ +_(aten, special_airy_ai) \ +_(aten, special_bessel_j0) \ +_(aten, special_bessel_j1) \ +_(aten, special_bessel_y0) \ +_(aten, special_bessel_y1) \ +_(aten, special_chebyshev_polynomial_t) \ +_(aten, special_chebyshev_polynomial_u) \ +_(aten, special_chebyshev_polynomial_v) \ +_(aten, special_chebyshev_polynomial_w) \ +_(aten, special_digamma) \ +_(aten, special_entr) \ +_(aten, special_erf) \ +_(aten, special_erfc) \ +_(aten, special_erfcx) \ +_(aten, special_erfinv) \ +_(aten, special_exp2) \ +_(aten, special_expit) \ +_(aten, special_expm1) \ +_(aten, special_gammainc) \ +_(aten, special_gammaincc) \ +_(aten, special_gammaln) \ +_(aten, special_hermite_polynomial_h) \ +_(aten, special_hermite_polynomial_he) \ +_(aten, special_i0) \ +_(aten, special_i0e) \ +_(aten, special_i1) \ +_(aten, special_i1e) \ +_(aten, special_laguerre_polynomial_l) \ +_(aten, special_legendre_polynomial_p) \ +_(aten, special_log1p) \ +_(aten, special_log_ndtr) \ +_(aten, special_log_softmax) \ +_(aten, special_logit) \ +_(aten, special_logsumexp) \ +_(aten, special_modified_bessel_i0) \ +_(aten, special_modified_bessel_i1) \ +_(aten, special_modified_bessel_k0) \ +_(aten, special_modified_bessel_k1) \ +_(aten, special_multigammaln) \ +_(aten, special_ndtr) \ +_(aten, special_ndtri) \ +_(aten, special_polygamma) \ +_(aten, special_psi) \ +_(aten, special_round) \ +_(aten, special_scaled_modified_bessel_k0) \ +_(aten, special_scaled_modified_bessel_k1) \ +_(aten, special_shifted_chebyshev_polynomial_t) \ +_(aten, special_shifted_chebyshev_polynomial_u) \ +_(aten, special_shifted_chebyshev_polynomial_v) \ +_(aten, special_shifted_chebyshev_polynomial_w) \ +_(aten, special_sinc) \ +_(aten, special_softmax) \ +_(aten, special_spherical_bessel_j0) \ +_(aten, special_xlog1py) \ +_(aten, special_xlogy) \ +_(aten, special_zeta) \ +_(aten, split) \ +_(aten, split_copy) \ +_(aten, split_with_sizes) \ +_(aten, split_with_sizes_copy) \ +_(aten, sqrt) \ +_(aten, sqrt_) \ +_(aten, square) \ +_(aten, square_) \ +_(aten, squeeze) \ +_(aten, squeeze_) \ +_(aten, squeeze_copy) \ +_(aten, sspaddmm) \ +_(aten, stack) \ +_(aten, std) \ +_(aten, std_mean) \ +_(aten, stft) \ +_(aten, stride) \ +_(aten, sub) \ +_(aten, sub_) \ +_(aten, subtract) \ +_(aten, subtract_) \ +_(aten, sum) \ +_(aten, sum_to_size) \ +_(aten, svd) \ +_(aten, swapaxes) \ +_(aten, swapaxes_) \ +_(aten, swapdims) \ +_(aten, swapdims_) \ +_(aten, sym_constrain_range) \ +_(aten, sym_constrain_range_for_size) \ +_(aten, sym_is_contiguous) \ +_(aten, sym_numel) \ +_(aten, sym_size) \ +_(aten, sym_storage_offset) \ +_(aten, sym_stride) \ +_(aten, t) \ +_(aten, t_) \ +_(aten, t_copy) \ +_(aten, take) \ +_(aten, take_along_dim) \ +_(aten, tan) \ +_(aten, tan_) \ +_(aten, tanh) \ +_(aten, tanh_) \ +_(aten, tanh_backward) \ +_(aten, tensor_split) \ +_(aten, tensordot) \ +_(aten, thnn_conv2d) \ +_(aten, threshold) \ +_(aten, threshold_) \ +_(aten, threshold_backward) \ +_(aten, tile) \ +_(aten, to) \ +_(aten, to_dense) \ +_(aten, to_dense_backward) \ +_(aten, to_mkldnn) \ +_(aten, to_mkldnn_backward) \ +_(aten, to_padded_tensor) \ +_(aten, to_sparse) \ +_(aten, to_sparse_bsc) \ +_(aten, to_sparse_bsr) \ +_(aten, to_sparse_csc) \ +_(aten, to_sparse_csr) \ +_(aten, topk) \ +_(aten, trace) \ +_(aten, trace_backward) \ +_(aten, transpose) \ +_(aten, transpose_) \ +_(aten, transpose_copy) \ +_(aten, trapezoid) \ +_(aten, trapz) \ +_(aten, triangular_solve) \ +_(aten, tril) \ +_(aten, tril_) \ +_(aten, tril_indices) \ +_(aten, triplet_margin_loss) \ +_(aten, triu) \ +_(aten, triu_) \ +_(aten, triu_indices) \ +_(aten, true_divide) \ +_(aten, true_divide_) \ +_(aten, trunc) \ +_(aten, trunc_) \ +_(aten, type_as) \ +_(aten, unbind) \ +_(aten, unbind_copy) \ +_(aten, unflatten) \ +_(aten, unflatten_dense_tensors) \ +_(aten, unfold) \ +_(aten, unfold_backward) \ +_(aten, unfold_copy) \ +_(aten, uniform) \ +_(aten, uniform_) \ +_(aten, unique_consecutive) \ +_(aten, unique_dim) \ +_(aten, unique_dim_consecutive) \ +_(aten, unsafe_chunk) \ +_(aten, unsafe_split) \ +_(aten, unsafe_split_with_sizes) \ +_(aten, unsqueeze) \ +_(aten, unsqueeze_) \ +_(aten, unsqueeze_copy) \ +_(aten, upsample_bicubic2d) \ +_(aten, upsample_bicubic2d_backward) \ +_(aten, upsample_bilinear2d) \ +_(aten, upsample_bilinear2d_backward) \ +_(aten, upsample_linear1d) \ +_(aten, upsample_linear1d_backward) \ +_(aten, upsample_nearest1d) \ +_(aten, upsample_nearest1d_backward) \ +_(aten, upsample_nearest2d) \ +_(aten, upsample_nearest2d_backward) \ +_(aten, upsample_nearest3d) \ +_(aten, upsample_nearest3d_backward) \ +_(aten, upsample_trilinear3d) \ +_(aten, upsample_trilinear3d_backward) \ +_(aten, value_selecting_reduction_backward) \ +_(aten, values) \ +_(aten, values_copy) \ +_(aten, vander) \ +_(aten, var) \ +_(aten, var_mean) \ +_(aten, vdot) \ +_(aten, view) \ +_(aten, view_as) \ +_(aten, view_as_complex) \ +_(aten, view_as_complex_copy) \ +_(aten, view_as_real) \ +_(aten, view_as_real_copy) \ +_(aten, view_copy) \ +_(aten, vsplit) \ +_(aten, vstack) \ +_(aten, where) \ +_(aten, xlogy) \ +_(aten, xlogy_) \ +_(aten, zero) \ +_(aten, zero_) \ +_(aten, zeros) \ +_(aten, zeros_like) + +#define FORALL_ATTR_BASE_SYMBOLS(_) \ +_(attr, A) \ +_(attr, B) \ +_(attr, C) \ +_(attr, H) \ +_(attr, HxW) \ +_(attr, K) \ +_(attr, L) \ +_(attr, LD) \ +_(attr, LU) \ +_(attr, LU_data) \ +_(attr, LU_pivots) \ +_(attr, M) \ +_(attr, N) \ +_(attr, P) \ +_(attr, Q) \ +_(attr, R) \ +_(attr, S) \ +_(attr, U) \ +_(attr, UPLO) \ +_(attr, V) \ +_(attr, Vh) \ +_(attr, W) \ +_(attr, X) \ +_(attr, a) \ +_(attr, abs) \ +_(attr, accumulate) \ +_(attr, accumulate_matches) \ +_(attr, activation) \ +_(attr, addends) \ +_(attr, adjoint) \ +_(attr, alg_id) \ +_(attr, algorithm) \ +_(attr, alibi_slopes) \ +_(attr, align_corners) \ +_(attr, align_to_window) \ +_(attr, allow_tf32) \ +_(attr, alpha) \ +_(attr, amsgrad) \ +_(attr, anchor) \ +_(attr, angle) \ +_(attr, any) \ +_(attr, api_name) \ +_(attr, append) \ +_(attr, approximate) \ +_(attr, arg1) \ +_(attr, arg2) \ +_(attr, arg3) \ +_(attr, arg_out) \ +_(attr, assert_msg) \ +_(attr, assume_unique) \ +_(attr, atol) \ +_(attr, attn_bias) \ +_(attr, attn_mask) \ +_(attr, average_attn_weights) \ +_(attr, averaging_const) \ +_(attr, aweights) \ +_(attr, axis) \ +_(attr, axis0) \ +_(attr, axis1) \ +_(attr, b) \ +_(attr, b_hh) \ +_(attr, b_ih) \ +_(attr, bag_size) \ +_(attr, base) \ +_(attr, batch1) \ +_(attr, batch2) \ +_(attr, batch_dim) \ +_(attr, batch_first) \ +_(attr, batch_size) \ +_(attr, batch_sizes) \ +_(attr, benchmark) \ +_(attr, beta) \ +_(attr, beta1) \ +_(attr, beta2) \ +_(attr, bias) \ +_(attr, bias_defined) \ +_(attr, bias_g) \ +_(attr, bias_requires_grad) \ +_(attr, bias_sizes) \ +_(attr, bidirectional) \ +_(attr, bin_edges) \ +_(attr, bins) \ +_(attr, bit_width) \ +_(attr, blank) \ +_(attr, block_size) \ +_(attr, blocksize) \ +_(attr, boundaries) \ +_(attr, buffer) \ +_(attr, ccol_indices) \ +_(attr, cdim) \ +_(attr, cdist) \ +_(attr, ceil_mode) \ +_(attr, cell_state_fwd) \ +_(attr, center) \ +_(attr, ch_axis) \ +_(attr, check_errors) \ +_(attr, check_pinning) \ +_(attr, chunks) \ +_(attr, coalesced) \ +_(attr, coefficients) \ +_(attr, col) \ +_(attr, col_indices) \ +_(attr, col_offsets) \ +_(attr, col_offsets_hh) \ +_(attr, col_offsets_ih) \ +_(attr, compressed_A) \ +_(attr, compressed_idx) \ +_(attr, compressed_indices) \ +_(attr, compressed_indices_dtype) \ +_(attr, compute_log_sumexp) \ +_(attr, compute_mode) \ +_(attr, compute_uv) \ +_(attr, compute_v) \ +_(attr, condition) \ +_(attr, contraction_dim) \ +_(attr, copy) \ +_(attr, correction) \ +_(attr, count) \ +_(attr, count_include_pad) \ +_(attr, counts) \ +_(attr, cpu_dtype) \ +_(attr, cpu_enabled) \ +_(attr, cpu_nested_shape_example) \ +_(attr, create_graph) \ +_(attr, crow_indices) \ +_(attr, cu_seqlens_k) \ +_(attr, cu_seqlens_q) \ +_(attr, cuda_dtype) \ +_(attr, cuda_enabled) \ +_(attr, cudnn_enable) \ +_(attr, cudnn_enabled) \ +_(attr, cum_seq_k) \ +_(attr, cum_seq_q) \ +_(attr, custom_mask_type) \ +_(attr, cx) \ +_(attr, cx_) \ +_(attr, cx_tmp) \ +_(attr, cy) \ +_(attr, cy_) \ +_(attr, d) \ +_(attr, dampening) \ +_(attr, data) \ +_(attr, decimals) \ +_(attr, delta) \ +_(attr, dense) \ +_(attr, dense_B) \ +_(attr, dense_dim) \ +_(attr, density) \ +_(attr, dep_token) \ +_(attr, descending) \ +_(attr, destination) \ +_(attr, deterministic) \ +_(attr, device) \ +_(attr, device_index) \ +_(attr, dgrad_glu) \ +_(attr, diagonal) \ +_(attr, diagonals) \ +_(attr, dilation) \ +_(attr, dim) \ +_(attr, dim0) \ +_(attr, dim1) \ +_(attr, dim2) \ +_(attr, dimension) \ +_(attr, dims) \ +_(attr, dims_other) \ +_(attr, dims_self) \ +_(attr, divisor_override) \ +_(attr, downscale_factor) \ +_(attr, driver) \ +_(attr, dropout) \ +_(attr, dropout_mask) \ +_(attr, dropout_p) \ +_(attr, dropout_seed) \ +_(attr, dropout_state) \ +_(attr, dst) \ +_(attr, dtype) \ +_(attr, dual) \ +_(attr, dummy) \ +_(attr, dx) \ +_(attr, edge_order) \ +_(attr, eigenvalues) \ +_(attr, eigenvectors) \ +_(attr, eigvals) \ +_(attr, eigvecs) \ +_(attr, element) \ +_(attr, elements) \ +_(attr, ellipsis_idx) \ +_(attr, embed_dim) \ +_(attr, enable_gqa) \ +_(attr, end) \ +_(attr, end_dim) \ +_(attr, eps) \ +_(attr, epsilon) \ +_(attr, equal_nan) \ +_(attr, equation) \ +_(attr, exp_avg_sqs) \ +_(attr, exp_avgs) \ +_(attr, expand1) \ +_(attr, expand2) \ +_(attr, expand3) \ +_(attr, exponent) \ +_(attr, exponential_average_factor) \ +_(attr, fake_quant_enabled) \ +_(attr, fake_quant_on) \ +_(attr, ffn_bias_1) \ +_(attr, ffn_bias_2) \ +_(attr, ffn_weight_1) \ +_(attr, ffn_weight_2) \ +_(attr, filename) \ +_(attr, fill) \ +_(attr, fill_value) \ +_(attr, flat) \ +_(attr, forward) \ +_(attr, found_inf) \ +_(attr, from) \ +_(attr, from_) \ +_(attr, full) \ +_(attr, full_matrices) \ +_(attr, fuse_transform_0213) \ +_(attr, fweights) \ +_(attr, g) \ +_(attr, gO) \ +_(attr, generator) \ +_(attr, ggI) \ +_(attr, ggW) \ +_(attr, ggb) \ +_(attr, glu) \ +_(attr, grad) \ +_(attr, grad_bias) \ +_(attr, grad_cy) \ +_(attr, grad_factor) \ +_(attr, grad_glu) \ +_(attr, grad_hy) \ +_(attr, grad_in) \ +_(attr, grad_input) \ +_(attr, grad_input_mask) \ +_(attr, grad_out) \ +_(attr, grad_out_) \ +_(attr, grad_output) \ +_(attr, grad_scale) \ +_(attr, grad_w) \ +_(attr, grad_weight) \ +_(attr, grad_x) \ +_(attr, grad_y) \ +_(attr, gradient) \ +_(attr, grads) \ +_(attr, grid) \ +_(attr, group) \ +_(attr, groups) \ +_(attr, growth_interval) \ +_(attr, growth_tracker) \ +_(attr, half_to_float) \ +_(attr, has_bias) \ +_(attr, has_biases) \ +_(attr, hermitian) \ +_(attr, hidden_bias) \ +_(attr, hidden_gates) \ +_(attr, hidden_size) \ +_(attr, high) \ +_(attr, hist) \ +_(attr, hop_length) \ +_(attr, hx) \ +_(attr, hx_) \ +_(attr, hy_) \ +_(attr, i1) \ +_(attr, i2) \ +_(attr, i3) \ +_(attr, ignore_index) \ +_(attr, imag) \ +_(attr, impl_index) \ +_(attr, implicit) \ +_(attr, in_features) \ +_(attr, include_last_offset) \ +_(attr, include_self) \ +_(attr, increasing) \ +_(attr, ind) \ +_(attr, index) \ +_(attr, index_dtype) \ +_(attr, indexing) \ +_(attr, indices) \ +_(attr, info) \ +_(attr, initial) \ +_(attr, innerKTiles) \ +_(attr, inp) \ +_(attr, input) \ +_(attr, input1) \ +_(attr, input2) \ +_(attr, input3) \ +_(attr, input_bias) \ +_(attr, input_dtype) \ +_(attr, input_g) \ +_(attr, input_gates) \ +_(attr, input_lengths) \ +_(attr, input_scale) \ +_(attr, input_size) \ +_(attr, input_sizes) \ +_(attr, input_zero_point) \ +_(attr, inputs) \ +_(attr, interpolation) \ +_(attr, interpolation_mode) \ +_(attr, inv_scale) \ +_(attr, inverse) \ +_(attr, invert) \ +_(attr, invstd) \ +_(attr, is_causal) \ +_(attr, is_coalesced) \ +_(attr, is_crow) \ +_(attr, is_first_step) \ +_(attr, is_matrix) \ +_(attr, is_result) \ +_(attr, is_target) \ +_(attr, k) \ +_(attr, keepdim) \ +_(attr, kernel_size) \ +_(attr, key) \ +_(attr, label_smoothing) \ +_(attr, lambd) \ +_(attr, largest) \ +_(attr, last_dim_size) \ +_(attr, layersOutputs) \ +_(attr, layout) \ +_(attr, left) \ +_(attr, length) \ +_(attr, lengths) \ +_(attr, level) \ +_(attr, like) \ +_(attr, list) \ +_(attr, log_alpha) \ +_(attr, log_input) \ +_(attr, log_probs) \ +_(attr, log_target) \ +_(attr, logabsdet) \ +_(attr, logsumexp) \ +_(attr, low) \ +_(attr, lower) \ +_(attr, lr) \ +_(attr, lr_decay) \ +_(attr, ltm) \ +_(attr, m) \ +_(attr, mantissa) \ +_(attr, margin) \ +_(attr, mask) \ +_(attr, mask_check) \ +_(attr, mask_type) \ +_(attr, masked_grad) \ +_(attr, mat) \ +_(attr, mat1) \ +_(attr, mat1_meta) \ +_(attr, mat2) \ +_(attr, matrices) \ +_(attr, max) \ +_(attr, max_exp_avg_sqs) \ +_(attr, max_k) \ +_(attr, max_lengths) \ +_(attr, max_norm) \ +_(attr, max_q) \ +_(attr, max_seqlen) \ +_(attr, max_seqlen_k) \ +_(attr, max_seqlen_q) \ +_(attr, max_size) \ +_(attr, max_val) \ +_(attr, max_values) \ +_(attr, maximize) \ +_(attr, maximum_indices) \ +_(attr, maxnorm) \ +_(attr, mean) \ +_(attr, median) \ +_(attr, memory_format) \ +_(attr, meta) \ +_(attr, min) \ +_(attr, min_indices) \ +_(attr, min_seqlen) \ +_(attr, min_val) \ +_(attr, minlength) \ +_(attr, mode) \ +_(attr, momentum) \ +_(attr, momentum_buffer_list) \ +_(attr, n) \ +_(attr, n_bins) \ +_(attr, n_fft) \ +_(attr, names) \ +_(attr, nan) \ +_(attr, need_weights) \ +_(attr, neg_log_likelihood) \ +_(attr, negative) \ +_(attr, negative_slope) \ +_(attr, neginf) \ +_(attr, nested_size) \ +_(attr, nested_strides) \ +_(attr, nesterov) \ +_(attr, new_data) \ +_(attr, nnz) \ +_(attr, noise) \ +_(attr, non_blocking) \ +_(attr, norm) \ +_(attr, norm_bias_1) \ +_(attr, norm_bias_2) \ +_(attr, norm_first) \ +_(attr, norm_type) \ +_(attr, norm_weight_1) \ +_(attr, norm_weight_2) \ +_(attr, normalization) \ +_(attr, normalized) \ +_(attr, normalized_shape) \ +_(attr, nt_example) \ +_(attr, num_chunks) \ +_(attr, num_classes) \ +_(attr, num_generated) \ +_(attr, num_groups) \ +_(attr, num_head) \ +_(attr, num_heads) \ +_(attr, num_layers) \ +_(attr, num_parallel) \ +_(attr, num_samples) \ +_(attr, num_splits_key) \ +_(attr, num_weights) \ +_(attr, numel) \ +_(attr, observer_on) \ +_(attr, offs) \ +_(attr, offset) \ +_(attr, offset2bag) \ +_(attr, offsets) \ +_(attr, onesided) \ +_(attr, ord) \ +_(attr, order) \ +_(attr, other) \ +_(attr, out) \ +_(attr, out0) \ +_(attr, out1) \ +_(attr, out2) \ +_(attr, out3) \ +_(attr, out4) \ +_(attr, out5) \ +_(attr, out6) \ +_(attr, out_channel) \ +_(attr, out_dim) \ +_(attr, out_dtype) \ +_(attr, out_features) \ +_(attr, out_int32) \ +_(attr, outdim) \ +_(attr, output) \ +_(attr, output_mask) \ +_(attr, output_padding) \ +_(attr, output_scale) \ +_(attr, output_size) \ +_(attr, output_zero_point) \ +_(attr, p) \ +_(attr, packed) \ +_(attr, packed_hh) \ +_(attr, packed_ih) \ +_(attr, packed_weight) \ +_(attr, packed_weights) \ +_(attr, pad) \ +_(attr, pad_mode) \ +_(attr, padded) \ +_(attr, padding) \ +_(attr, padding_idx) \ +_(attr, padding_mode) \ +_(attr, padding_side) \ +_(attr, padding_value) \ +_(attr, params) \ +_(attr, path) \ +_(attr, pdist) \ +_(attr, per_row_fake_quant) \ +_(attr, per_sample_weights) \ +_(attr, periodic) \ +_(attr, philox_offset) \ +_(attr, philox_seed) \ +_(attr, physical_layout) \ +_(attr, pin_memory) \ +_(attr, pivot) \ +_(attr, pivots) \ +_(attr, plain_idx) \ +_(attr, plain_indices) \ +_(attr, pos_weight) \ +_(attr, posinf) \ +_(attr, positive) \ +_(attr, pow) \ +_(attr, prepend) \ +_(attr, primal) \ +_(attr, prob) \ +_(attr, proj_bias) \ +_(attr, proj_size) \ +_(attr, proj_weight) \ +_(attr, q) \ +_(attr, qGroupSize) \ +_(attr, qScale) \ +_(attr, qScaleAndZeros) \ +_(attr, qZeros) \ +_(attr, qkv) \ +_(attr, qkv_bias) \ +_(attr, qkv_weight) \ +_(attr, qtensor) \ +_(attr, quant_max) \ +_(attr, quant_min) \ +_(attr, quasi) \ +_(attr, query) \ +_(attr, r) \ +_(attr, ragged_idx) \ +_(attr, random_samples) \ +_(attr, range) \ +_(attr, rank) \ +_(attr, ratio) \ +_(attr, rcond) \ +_(attr, real) \ +_(attr, recipe_a) \ +_(attr, recipe_b) \ +_(attr, reduce) \ +_(attr, reduce_range) \ +_(attr, reduction) \ +_(attr, repeats) \ +_(attr, replacement) \ +_(attr, requires_grad) \ +_(attr, reserve) \ +_(attr, reserveSpace) \ +_(attr, reservedSpace) \ +_(attr, residuals) \ +_(attr, result) \ +_(attr, retain_graph) \ +_(attr, return_complex) \ +_(attr, return_counts) \ +_(attr, return_debug_mask) \ +_(attr, return_inverse) \ +_(attr, reverse) \ +_(attr, right) \ +_(attr, rng_state) \ +_(attr, rounding_mode) \ +_(attr, row) \ +_(attr, row_indices) \ +_(attr, rstd) \ +_(attr, rtol) \ +_(attr, running_max) \ +_(attr, running_mean) \ +_(attr, running_min) \ +_(attr, running_var) \ +_(attr, s) \ +_(attr, save_invstd) \ +_(attr, save_mean) \ +_(attr, save_var) \ +_(attr, save_var_transform) \ +_(attr, saved_g) \ +_(attr, saved_norms) \ +_(attr, saved_v) \ +_(attr, scalar) \ +_(attr, scalar1) \ +_(attr, scalar2) \ +_(attr, scalars) \ +_(attr, scale) \ +_(attr, scale_a) \ +_(attr, scale_b) \ +_(attr, scale_backoff_factor) \ +_(attr, scale_factors) \ +_(attr, scale_grad_by_freq) \ +_(attr, scale_growth_factor) \ +_(attr, scale_hh) \ +_(attr, scale_ih) \ +_(attr, scale_result) \ +_(attr, scales) \ +_(attr, scales_d) \ +_(attr, scales_h) \ +_(attr, scales_w) \ +_(attr, scales_zeros) \ +_(attr, sections) \ +_(attr, seed) \ +_(attr, self) \ +_(attr, self_is_result) \ +_(attr, self_num_batch_dims) \ +_(attr, self_or_result) \ +_(attr, self_sizes) \ +_(attr, seqlen_k) \ +_(attr, sequences) \ +_(attr, seqused_k) \ +_(attr, shape) \ +_(attr, shared) \ +_(attr, shared_storage_dqdkdv) \ +_(attr, shifts) \ +_(attr, side) \ +_(attr, sigma) \ +_(attr, sign) \ +_(attr, singular_values) \ +_(attr, size) \ +_(attr, sizes) \ +_(attr, skip_first) \ +_(attr, sobolstate) \ +_(attr, solution) \ +_(attr, some) \ +_(attr, sorted) \ +_(attr, sorted_sequence) \ +_(attr, sorter) \ +_(attr, source) \ +_(attr, spacing) \ +_(attr, sparse) \ +_(attr, sparse_dim) \ +_(attr, sparse_grad) \ +_(attr, split_k) \ +_(attr, split_k_mode) \ +_(attr, split_size) \ +_(attr, split_sizes) \ +_(attr, src) \ +_(attr, stable) \ +_(attr, start) \ +_(attr, start_dim) \ +_(attr, state_steps) \ +_(attr, state_sums) \ +_(attr, std) \ +_(attr, step) \ +_(attr, steps) \ +_(attr, storage_offset) \ +_(attr, stride) \ +_(attr, sum_S) \ +_(attr, sum_dy) \ +_(attr, sum_dy_xmu) \ +_(attr, sumdim) \ +_(attr, swap) \ +_(attr, swizzle_a) \ +_(attr, swizzle_b) \ +_(attr, symmetric_quant) \ +_(attr, t) \ +_(attr, tangent) \ +_(attr, target) \ +_(attr, target_lengths) \ +_(attr, targets) \ +_(attr, tau) \ +_(attr, tensor) \ +_(attr, tensor1) \ +_(attr, tensor2) \ +_(attr, tensor_indices_or_sections) \ +_(attr, tensors) \ +_(attr, tensors1) \ +_(attr, test_element) \ +_(attr, test_elements) \ +_(attr, the_template) \ +_(attr, theta) \ +_(attr, thread_masks) \ +_(attr, threshold) \ +_(attr, to) \ +_(attr, tol) \ +_(attr, total) \ +_(attr, total_L) \ +_(attr, total_length) \ +_(attr, total_weight) \ +_(attr, train) \ +_(attr, training) \ +_(attr, transpose) \ +_(attr, transpose_result) \ +_(attr, transposed) \ +_(attr, type1) \ +_(attr, type2) \ +_(attr, unbiased) \ +_(attr, unitriangular) \ +_(attr, unpack_data) \ +_(attr, unpack_pivots) \ +_(attr, unroll_dim) \ +_(attr, unsafe) \ +_(attr, unused) \ +_(attr, update) \ +_(attr, upper) \ +_(attr, upscale_factor) \ +_(attr, use_cutlass) \ +_(attr, use_fast_accum) \ +_(attr, use_gelu) \ +_(attr, use_input_stats) \ +_(attr, v) \ +_(attr, value) \ +_(attr, values) \ +_(attr, var) \ +_(attr, vec) \ +_(attr, vec1) \ +_(attr, vec2) \ +_(attr, w_hh) \ +_(attr, w_ih) \ +_(attr, weight) \ +_(attr, weight0) \ +_(attr, weight1) \ +_(attr, weight2) \ +_(attr, weight3) \ +_(attr, weight4) \ +_(attr, weight_arr) \ +_(attr, weight_buf) \ +_(attr, weight_decay) \ +_(attr, weight_g) \ +_(attr, weight_scale) \ +_(attr, weight_stride0) \ +_(attr, weight_zero_point) \ +_(attr, weights) \ +_(attr, win_length) \ +_(attr, window) \ +_(attr, window_length) \ +_(attr, window_size) \ +_(attr, window_size_left) \ +_(attr, window_size_right) \ +_(attr, with_replacement) \ +_(attr, workspace) \ +_(attr, wrap) \ +_(attr, x) \ +_(attr, x1) \ +_(attr, x2) \ +_(attr, y) \ +_(attr, z) \ +_(attr, z_state) \ +_(attr, zero_infinity) \ +_(attr, zero_point) \ +_(attr, zero_point_hh) \ +_(attr, zero_point_ih) \ +_(attr, zero_points) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/blob.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/blob.h new file mode 100644 index 0000000000000000000000000000000000000000..f2e5e419a26260fdc2effdb8a2e93707f1ebc49a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/blob.h @@ -0,0 +1,209 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace caffe2 { + +class Tensor; + +/** + * @brief Blob is a general container that hosts a typed pointer. + * + * A Blob hosts a pointer as well as its type, and takes charge of deleting it + * properly when the blob is deallocated or re-allocated with a new type. A blob + * could contain anything, although the most common case is to contain a Tensor. + */ +class TORCH_API Blob final : public c10::intrusive_ptr_target { + public: + /** + * Initializes an empty Blob. + */ + Blob() noexcept = default; + ~Blob() override { + Reset(); + } + + Blob(Blob&& other) noexcept : Blob() { + swap(other); + } + + Blob& operator=(Blob&& other) noexcept { + Blob(std::move(other)).swap(*this); + return *this; + } + + /** + * Checks if the content stored in the blob is of type T. + */ + template + bool IsType() const noexcept { + return meta_.Match(); + } + + /** + * Returns the meta info of the blob. + */ + const TypeMeta meta() const noexcept { + return meta_; + } + + /** + * Returns a printable typename of the blob. + */ + std::string_view TypeName() const noexcept { + return meta_.name(); + } + + /** + * @brief Gets the const reference of the stored object. The code checks if + * the stored object is of the desired type. + */ + // TODO(jerryzh): add a Get(c10::DeviceType) function? + template + const T& Get() const { + TORCH_INTERNAL_ASSERT( + IsType(), + "wrong type for the Blob instance. Blob contains ", + meta_.name(), + " while caller expects ", + TypeMeta::TypeName()); + // TODO: after we add Get(c10::DeviceType) + // and changed all the callsites, we can add + // a static assert here to enforce T != Tensor + return *static_cast(pointer_); + } + + const void* GetRaw() const noexcept { + return pointer_; + } + void* GetRaw() noexcept { + return pointer_; + } + + /** + * @brief Gets a mutable pointer to the stored object. + * + * If the current object is not of the right type, a new object is created + * and the old object is freed. Note that type T should have a default + * constructor. Otherwise, create the object yourself first, and use + * Reset(). + */ + template + T* GetMutable() { + static_assert( + std::is_default_constructible_v, + "GetMutable can't be called with non-default-constructible types. " + "Try using specialized methods"); + if (IsType()) { + return static_cast(pointer_); + } else { + // TODO Re-enable logging + // VLOG(1) << "Create new mutable object " << TypeMeta::TypeName(); + return Reset(new T()); + } + } + + template + T* GetMutableOrNull() { + if (IsType()) { + return static_cast(pointer_); + } else { + return nullptr; + } + } + + /** + * Sets the underlying object to the allocated one. The Blob then takes over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * This is used when the underlying class T does not have a default ctor, or + * complex initializations needs to be done outside the blob. + */ + template + T* Reset(T* allocated) { + free_(); + meta_ = TypeMeta::Make(); + pointer_ = static_cast(allocated); + has_ownership_ = true; + return allocated; + } + + /** + * Sets the underlying object to the allocated one, but does not take over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * Unlike Reset, this does not take over the ownership of the pointer and the + * caller is responsible for making sure that the lifetime of the allocated + * blob outlasts the lifetime of any access to this blob, until another Reset + * call is made or the blob is destructed. + */ + template + std::remove_const_t* ShareExternal( + std::remove_const_t* allocated) { + return static_cast(ShareExternal( + static_cast(allocated), + TypeMeta::Make>())); + } + + void* ShareExternal(void* allocated, const TypeMeta meta) { + free_(); + meta_ = meta; + pointer_ = allocated; + has_ownership_ = false; + return allocated; + } + + /** + * Resets the Blob to an empty one. + */ + void Reset() { + free_(); + pointer_ = nullptr; + meta_ = TypeMeta(); + has_ownership_ = false; + } + + /** + * @brief Swaps the underlying storage of two blobs. + */ + void swap(Blob& rhs) noexcept { + using std::swap; + swap(meta_, rhs.meta_); + swap(pointer_, rhs.pointer_); + swap(has_ownership_, rhs.has_ownership_); + } + + private: + void free_() { + if (has_ownership_ && pointer_ != nullptr) { + (*meta_.deleteFn())(pointer_); + } + } + + TypeMeta meta_; + void* pointer_{nullptr}; + bool has_ownership_{false}; + + C10_DISABLE_COPY_AND_ASSIGN(Blob); +}; + +inline void swap(Blob& lhs, Blob& rhs) noexcept { + lhs.swap(rhs); +} + +inline std::ostream& operator<<(std::ostream& out, const Blob& v) { + return out << "Blob[" << v.TypeName() << ']'; +} + +} // namespace caffe2 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h new file mode 100644 index 0000000000000000000000000000000000000000..2660a2f88cd6eadb124d31c7e45c3ac84560d130 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h @@ -0,0 +1,95 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct BuiltinOpFunction : public Function { + BuiltinOpFunction( + c10::QualifiedName qualname, + c10::FunctionSchema schema, + std::function callable, + std::string doc_string = "") + : name_(std::move(qualname)), + callable_(std::move(callable)), + schema_(std::move(schema)), + doc_string_(std::move(doc_string)) { + TORCH_INTERNAL_ASSERT(schema_.returns().size() == 1); + } + + std::string_view doc_string() const override { + return doc_string_; + } + + void run(Stack& stack) override { + callable_(stack); + } + + c10::intrusive_ptr runAsync( + Stack& stack, + TaskLauncher /* not used */) override { + run(stack); + auto res = c10::make_intrusive(stack.front().type()); + res->markCompleted(std::move(stack.front())); + return res; + } + + const c10::QualifiedName& qualname() const override { + return name_; + } + + // if this isn't yet defined, run its method_creator function + void ensure_defined() override { + // nop + } + + const c10::FunctionSchema& getSchema() const override { + return schema_; + } + + size_t num_inputs() const override { + return schema_.arguments().size(); + } + + Function& setSchema(c10::FunctionSchema schema) override { + schema_ = std::move(schema); + return *this; + } + + bool call( + Stack& stack, + std::optional /*unused*/, + c10::function_ref /*unused*/) override { + run(stack); + return false; + } + + bool call(Stack& stack, c10::function_ref /*unused*/) + override { + run(stack); + return false; + } + + ~BuiltinOpFunction() override = default; + + private: + c10::QualifiedName name_; + + std::function callable_; + + c10::FunctionSchema schema_; + + std::string doc_string_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h new file mode 100644 index 0000000000000000000000000000000000000000..484afef253a21957ffcf503bfb0c8f95f27519b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h @@ -0,0 +1,446 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + + +namespace torch::jit { +struct CompilationUnit; +struct Function; +} // namespace torch::jit + + +namespace c10 { + +struct FunctionSchema; + +// This enumerator represents the 'kind' of an attribute - a buffer, a parameter, or neither. +// This state is mutually exclusive. Buffers and Parameters can only appear on modules. +enum class AttributeKind { + BUFFER, + PARAMETER, + REGULAR_ATTRIBUTE +}; + +// This structure represents all notional booking entities in a class attribute: name, kind (see: AttributeKind), and type (see: TypePtr). +// Note: This structure does not represent the value of the attribute. +struct TORCH_API ClassAttribute { + public: + ClassAttribute(AttributeKind kind, + TypePtr attributeType, + std::string attributeName) : + kind_(kind), + attributeType_(std::move(attributeType)), + attributeName_(std::move(attributeName)) {} + + AttributeKind getKind() const { + return kind_; + } + + const TypePtr& getType() const { + return attributeType_; + } + + const std::string& getName() const { + return attributeName_; + } + + private: + AttributeKind kind_; + TypePtr attributeType_; + std::string attributeName_; +}; + +/** + * User Defined Types + */ + +struct ClassType; +using ClassTypePtr = std::shared_ptr; +using ::torch::jit::CompilationUnit; + +// This represents a class in TorchScript. +struct TORCH_API ClassType : public NamedType { + // This represents an attribute of a class; a name associated with an attribute, and a + // getter and (optional) setter for that attribute. + struct Property { + std::string name; + torch::jit::Function* getter; + torch::jit::Function* setter; + }; + + // Create a class type with name `name` and its methods stored in `cu`. + static ClassTypePtr create( + std::optional qualifiedName, + std::weak_ptr cu, + bool is_module = false, + std::string doc_string = "", + std::vector unresolved_class_attributes = {}); + + bool equals(const Type& rhs) const override { + if (this == &rhs) { + return true; + } + if (auto user_rhs = rhs.castRaw()) { + const auto& lhs_name = name(); + const auto& rhs_name = user_rhs->name(); + return lhs_name.has_value() && lhs_name == rhs_name && + this->compilation_unit() == user_rhs->compilation_unit(); + } + return false; + } + + std::string str() const override { + return annotation_str(); + } + + std::string repr_str() const override { + std::stringstream ss; + ss << str() + << " (of Python compilation unit at: " << compilation_unit().get() << ')'; + return ss.str(); + } + + const std::vector& methods() const; + + TypePtr findAttribute(const std::string& name) const { + size_t pos = 0; + for (const auto& attr : attributes_) { + if (name == attr.getName()) { + break; + } + ++pos; + } + + if (pos >= attributes_.size()) { + return nullptr; + } + return attributes_[pos].getType(); + } + + const TypePtr& getAttribute(const std::string& name) const { + auto slot = findAttributeSlot(name); + TORCH_CHECK( + slot, + repr_str(), + " does not have an attribute with name '", + name, + "'"); + return attributes_[*slot].getType(); + } + + size_t numAttributes() const { + return attributes_.size(); + } + + const TypePtr& getAttribute(size_t slot) const { + AT_ASSERT(slot < attributes_.size()); + return attributes_.at(slot).getType(); + } + + const std::string getAttributeName(size_t slot) const { + AT_ASSERT(slot < attributes_.size()); + return attributes_[slot].getName(); + } + + void checkNotExist(const std::string& name, const std::string& what) const; + + // Attributes are stored in a specific slot at runtime for efficiency. + // When emitting instructions we specify the slot so that attribute access is + // a constant lookup + std::optional findAttributeSlot(const std::string& name) const { + size_t slot = 0; + for (const auto& attr : attributes_) { + if (name == attr.getName()) { + return slot; + } + slot++; + } + return std::nullopt; + } + size_t getAttributeSlot(const std::string& name) const { + if (auto r = findAttributeSlot(name)) { + return *r; + } + TORCH_CHECK( + false, + repr_str(), + " does not have an attribute with name '", + name, + "'"); + } + + bool hasAttribute(const std::string& name) const { + return std::find_if( + attributes_.cbegin(), + attributes_.cend(), + [&](const ClassAttribute& attr) { return attr.getName() == name; }) != + attributes_.cend(); + } + + bool isUnresolvedClassAttribute(const std::string& name) const; + + at::ArrayRef containedTypes() const override { + return attributeTypes_; + } + + size_t addAttribute( + const std::string& name, + TypePtr type, + bool is_parameter = false, + bool is_buffer = false); + + // [Internal Only] Remove attribute from the ClassType, + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + void unsafeRemoveAttribute(const std::string& name); + + // [Internal Only] Change the type of an attribute of the ClassType, + // The caller is responsible to make sure the modification is safe: + // it is unsafe to maintain uses of the old type of the attribute, + // and any code that works on the attribute is now invalid. + // Only newly created code is valid again. + void unsafeChangeAttributeType(const std::string& name, const TypePtr& new_ty); + + // Add attribute \p NAME if it doesn't exist or verify that it has a + // compatible type otherwise. + size_t addOrCheckAttribute( + const std::string& name, + TypePtr ty, + bool is_parameter = false, + bool is_buffer = false) { + auto slot_idx = findAttributeSlot(name); + if (!slot_idx) { + return addAttribute(name, std::move(ty), is_parameter, is_buffer); + } + + TORCH_CHECK( + is_parameter == this->is_parameter(*slot_idx), + "Parameter field mismatch for the field '", + name, + "'"); + const TypePtr& atype = getAttribute(*slot_idx); + TORCH_CHECK( + ty->isSubtypeOf(*atype), + ty->repr_str(), + " is not compatible with the type ", + atype->repr_str(), + " for the field '", + name, + "'"); + return *slot_idx; + } + + // Get the property with the given \p name, if it exists on the class. + std::optional getProperty(const std::string& name); + // Add a property named \p name with \p getter and \p setter as its getter and setter. + void addProperty(const std::string& name, torch::jit::Function* getter, torch::jit::Function* setter); + // Get a list of all properties. + const std::vector& properties() const { + return properties_; + } + + bool hasConstant(const std::string& name) const { + return std::find_if( + constantNames_.cbegin(), + constantNames_.cend(), + [&](const std::string& constant) { return constant == name; }) != + constantNames_.cend(); + } + + size_t addConstant(const std::string& name, const IValue& value); + + std::optional findConstantSlot(const std::string& name) const; + + size_t getConstantSlot(const std::string& name) const { + if (auto r = findConstantSlot(name)) { + return *r; + } + TORCH_CHECK( + false, + repr_str(), + " does not have constant field with the name '", + name, + "'"); + } + + const std::string& getConstantName(size_t slot) const; + + const std::string& doc_string() const { + return doc_string_; + } + + IValue getConstant(const std::string& name) const; + + IValue getConstant(size_t slot) const; + + std::optional findConstant(const std::string& name) const; + + size_t numConstants() const; + + at::ArrayRef constantNames() const { + return constantNames_; + } + + at::ArrayRef constantValues() const; + + // [Internal Only] Remove constant from the ClassType + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + void unsafeRemoveConstant(const std::string& name); + + TypePtr createWithContained(std::vector contained_types) const override { + auto ptr = ClassType::create(name(), compilation_unit_, is_module()); + AT_ASSERT(numAttributes() == contained_types.size()); + for(size_t i = 0; i < attributes_.size(); ++i) { + AT_ASSERT(attributes_[i].getType()->isSubtypeOf(*contained_types[i])); + ptr->addAttribute(attributes_[i].getName(), std::move(contained_types[i])); + } + // Copy methods over + for (const auto& method : methods()) { + ptr->addMethod(method); + } + return ptr; + } + + bool is_module() const override { + return isModule_; + } + + const std::vector& getAttributes() const { + return attributes_; + } + + bool is_parameter(size_t slot) const { + TORCH_INTERNAL_ASSERT( + is_module(), "asking for parameterSlots of non-Module"); + return attributes_.at(slot).getKind() == AttributeKind::PARAMETER; + } + + bool is_buffer(size_t slot) const { + TORCH_INTERNAL_ASSERT( + is_module(), "asking for bufferWrittenSlots of non-Module"); + return attributes_.at(slot).getKind() == AttributeKind::BUFFER; + } + + void addForwardPreHook(torch::jit::Function* pre_hook_ptr); + void addForwardHook(torch::jit::Function* hook_ptr); + torch::jit::Function* findForwardPreHook(const std::string& name) const; + torch::jit::Function* findForwardHook(const std::string& name) const; + const std::vector& getForwardHooks() const; + const std::vector& getForwardPreHooks() const; + + void checkForwardPreHookSchema( + size_t pre_hook_idx, + const FunctionSchema& pre_hook_schema) const; + void checkForwardHookSchema( + size_t hook_idx, + const FunctionSchema& hook_schema) const; + + void addMethod(torch::jit::Function* method); + torch::jit::Function* findMethod(const std::string& name) const; + torch::jit::Function& getMethod(const std::string& name) const; + torch::jit::Function* findHook(const std::string& name) const; + torch::jit::Function& getHook(const std::string& name) const; + bool hasMethod(const std::string& name) const; + + torch::jit::Function* findStaticMethod(const std::string& name) const; + void addStaticMethod(torch::jit::Function* method); + + // [Internal Only] Remove method from the ClassType + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + // Note this method is intended for freezing only. + void unsafeRemoveMethod(const std::string& name); + + std::shared_ptr compilation_unit(); + + std::shared_ptr compilation_unit() const; + + // generate a refined version of this class. + // It has the same name but the slot Types are subtypes of + // the original slots. It is only valid to refine a class type in a context + // where it is know that there are not assignments to the objects slots + // that would invalidate the refinement. + // These variants are not registered in the global class table. + ClassTypePtr refine(at::ArrayRef refined_slots) const; + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + static const TypeKind Kind = TypeKind::ClassType; + + private: + ClassType( + std::optional name, + std::weak_ptr cu, + bool is_module = false, + std::string doc_string = "", + std::vector unresolved_class_attributes = {}); + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + + void addAttribute(ClassAttribute classAttribute); + std::string getForwardPreHookErrorMessage(size_t pre_hook_idx) const; + std::string getForwardHookErrorMessage(size_t hook_idx) const; + + // Mapping of attribute names -> their type. + // NOTE: this does not contain methods, which are stored in the module + // TODO: once modules support arbitrary ivalue attributes, we don't need this + // anymore. + // TODO: This is better represented as an OrderedDict, but alas it is not yet + // available from c10 + + // Mapping of constant names -> their value. + std::vector constantNames_; + std::vector constantValues_; + // Holds method attributes + std::weak_ptr compilation_unit_; + + // Holds all attributes, attribute details are found on ClassAttribute + std::vector attributes_; + // Construct mirroring attributes_, only around due to the fact that `containedTypes()` method returns an ArrayRef. + // Never fill this without using the appropriate provideNewClassAttribute method + std::vector attributeTypes_; + + // List of methods associated with this class. + std::vector methods_; + std::vector staticmethods_; + + // List of hooks to be run before/after forward. + std::vector forward_hooks_; + std::vector forward_pre_hooks_; + + // List of properties exposed by this class. + std::vector properties_; + + bool isModule_ = false; + + // Doc string of class. + std::string doc_string_; + + // For error reporting accesses to class level attributes. + std::vector unresolved_class_attributes_; +}; + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h new file mode 100644 index 0000000000000000000000000000000000000000..2811256fb352bb7bf2cfe3fac74c5db69e932319 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace c10 { + +struct ClassType; +using ClassTypePtr = std::shared_ptr; + +TORCH_API c10::ClassTypePtr getCustomClassTypeImpl(const std::type_index &tindex); + +template +const c10::ClassTypePtr& getCustomClassType() { + // Classes are never unregistered from getCustomClassTypeMap and the + // hash lookup can be a hot path, so just cache. + // For the same reason, it's fine If this ends up getting duplicated across + // DSO boundaries for whatever reason. + static c10::ClassTypePtr cache = getCustomClassTypeImpl( + std::type_index(typeid(T))); + return cache; +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h new file mode 100644 index 0000000000000000000000000000000000000000..9d44d9ae8df7de333932d10e5921953e78491418 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h @@ -0,0 +1,252 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace c10 { + +using DynamicTypeBits = std::uint32_t; +#define DYNAMIC_TYPE_BIT(x) (1u << x) + +constexpr DynamicTypeBits kDynamicCovariantTypeBit = DYNAMIC_TYPE_BIT(31); +constexpr DynamicTypeBits kDynamicAnyTypeBit = DYNAMIC_TYPE_BIT(30); + +constexpr DynamicTypeBits kDynamicNoneTypeBit = DYNAMIC_TYPE_BIT(1); +constexpr DynamicTypeBits kDynamicIntTypeBit = DYNAMIC_TYPE_BIT(3); +constexpr DynamicTypeBits kDynamicFloatTypeBit = DYNAMIC_TYPE_BIT(4); +constexpr DynamicTypeBits kDynamicComplexTypeBit = DYNAMIC_TYPE_BIT(5); +constexpr DynamicTypeBits kDynamicListTypeBit = DYNAMIC_TYPE_BIT(7); +constexpr DynamicTypeBits kDynamicTupleTypeBit = DYNAMIC_TYPE_BIT(8); +constexpr DynamicTypeBits kDynamicClassTypeBit = DYNAMIC_TYPE_BIT(10); + +#define FORALL_DYNAMIC_TYPES(_) \ + _(Tensor, DYNAMIC_TYPE_BIT(0), 1) \ + _(None, kDynamicNoneTypeBit, 1) \ + _(Bool, DYNAMIC_TYPE_BIT(2), 1) \ + _(Int, kDynamicIntTypeBit, 1) \ + _(Float, kDynamicFloatTypeBit, 1) \ + _(Complex, kDynamicComplexTypeBit, 1) \ + _(Number, \ + (kDynamicIntTypeBit | kDynamicFloatTypeBit | kDynamicComplexTypeBit), \ + 1) \ + _(String, DYNAMIC_TYPE_BIT(6), 1) \ + _(List, kDynamicListTypeBit, 0) \ + _(Tuple, (kDynamicTupleTypeBit | kDynamicCovariantTypeBit), 0) \ + _(Dict, DYNAMIC_TYPE_BIT(9), 0) \ + _(Class, kDynamicClassTypeBit, 0) \ + _(Optional, \ + (DYNAMIC_TYPE_BIT(11) | kDynamicNoneTypeBit | kDynamicCovariantTypeBit), \ + 0) \ + _(AnyList, (kDynamicListTypeBit | kDynamicAnyTypeBit), 1) \ + _(AnyTuple, \ + (kDynamicTupleTypeBit | kDynamicCovariantTypeBit | kDynamicAnyTypeBit), \ + 1) \ + _(DeviceObj, DYNAMIC_TYPE_BIT(12), 1) \ + _(StreamObj, DYNAMIC_TYPE_BIT(13), 1) \ + _(Capsule, DYNAMIC_TYPE_BIT(14), 1) \ + _(Generator, DYNAMIC_TYPE_BIT(15), 1) \ + _(Storage, DYNAMIC_TYPE_BIT(16), 1) \ + _(Var, DYNAMIC_TYPE_BIT(17), 0) \ + _(AnyClass, (kDynamicClassTypeBit | kDynamicAnyTypeBit), 1) \ + _(QScheme, DYNAMIC_TYPE_BIT(18), 1) \ + _(Quantizer, DYNAMIC_TYPE_BIT(19), 1) \ + _(AnyEnum, DYNAMIC_TYPE_BIT(20), 1) \ + _(RRef, DYNAMIC_TYPE_BIT(21), 0) \ + _(Future, DYNAMIC_TYPE_BIT(22), 0) \ + _(Await, DYNAMIC_TYPE_BIT(23), 0) \ + _(Any, 0xffffffff, 1) + +#define FORALL_DYNAMIC_TYPES_FAKE(_) \ + _(ScalarType, kDynamicIntTypeBit, 1) \ + _(Layout, kDynamicIntTypeBit, 1) \ + _(SymInt, kDynamicIntTypeBit, 1) \ + _(SymBool, kDynamicIntTypeBit, 1) \ + _(MemoryFormat, kDynamicIntTypeBit, 1) + +#define FORWARD_DECL_TYPE(NAME, _, __) struct NAME ## Type; + FORALL_DYNAMIC_TYPES(FORWARD_DECL_TYPE) + FORALL_DYNAMIC_TYPES_FAKE(FORWARD_DECL_TYPE) +#undef FORWARD_DECL_TYPE + +class DynamicType; +using DynamicTypePtr = std::shared_ptr; + +/** + * DynamicType is designed as a low dependency type system for TorchScript. The + * existing JIT types are used for both compilation and runtime, which makes + * sense for server contexts because we often compile and run the model in + * the same process, however this doesn't hold for mobile devices where we + * always compiles a model ahead of time, therefore there will be dependencies + * which are not needed, but built with mobile runtime causing binary size + * bloat, by design. Every basic type like Int, Bool or String will bring their + * vtable, typeinfo, constructor, destructor and even more data from their + * specializations for STL types to the binary causing a long tail bloat. + * + * The core problem is about the complexity to implement and maintain a single + * type system for both analysis and execution purposes. Although they should + * have the exactly same semantics, in practice implement a unified abstraction + * adds conceptual and representational overhead for both sides of the world. + * + * To address the issues, DynamicType implements a minimal subset of JIT types + * and uses a generic algorithm to test all subtyping relations. To achieve + * this, we assign each dynamic type a single integer tag to represent its + * semantics. More specifically, a dynamic type is defined as a set of "control + * bits" and "data bits", where control bits describe the special behavior when + * testing a type and data bits map to identity of each nominal type. We use bit + * operations to perform all the tests. + * + * For example, a "covariant bit" is a control bit used to describe if a type + * is covariant, right now the most used one is tuple type, and in addition to + * the control bit, tuple type's data bit is the 8th bit from the LSB. Control + * bits start from MSB and data bits start from LSB. + * + * If two types are equal, then they are subtype of each other, also if the bits + * from one type tag is subset of the other tag, it automatically becomes a + * subtype of the other. This simplifies the subtyping logic a lot, and over the + * long term it is possible to adopt this scheme on the server side as well. + * Special cases can be added but they generally should not take too much code + * size. + * + * DynamicType may or may not inherit from c10::Type because it's not the core + * requirement of DynamicType to interface with existing JIT types, but we might + * want to inherit from c10::Type to reduce the migration cost. + */ +class DynamicType : public SharedType { + using ClassTypePtr = std::shared_ptr; + + /** + * A implementation detail to support NamedTuple. + */ + struct LabeledDynamicType { + std::optional label; + DynamicTypePtr ty; + explicit LabeledDynamicType(DynamicTypePtr t) : ty(std::move(t)) {} + + bool equals(const LabeledDynamicType& other) const; + bool isSubtypeOf(const LabeledDynamicType& other) const; + }; + + public: + // TODO Change Ptr to DynamicTypePtr when all migrations are done. + using Ptr = TypePtr; + using ElementType = DynamicType; + ~DynamicType() override; + + struct Arguments { + Arguments() = default; + Arguments(c10::ArrayRef /*args*/); + Arguments(const std::vector& /*names*/, c10::ArrayRef /*args*/); + std::vector elems; + }; + + enum class Tag : DynamicTypeBits { +#define DYNAMIC_TYPE_ITEM(NAME, VAL, _) NAME = VAL, + FORALL_DYNAMIC_TYPES(DYNAMIC_TYPE_ITEM) + FORALL_DYNAMIC_TYPES_FAKE(DYNAMIC_TYPE_ITEM) +#undef DYNAMIC_TYPE_ITEM + }; + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + std::string str() const override; + static const TypeKind Kind = TypeKind::DynamicType; + static TORCH_API DynamicTypePtr create(Type& ty); + + explicit DynamicType(Tag /*tag*/, Arguments /*arguments*/); + explicit DynamicType(Tag /*tag*/, std::string_view /*name*/, Arguments /*arguments*/); + + DynamicType(DynamicType&& other) = delete; + DynamicType(const DynamicType&) = delete; + DynamicType& operator=(const DynamicType&) = delete; + DynamicType& operator=(DynamicType&&) = delete; + + TypePtr containedType(size_t /*i*/) const override; + size_t containedTypeSize() const override; + Tag tag() const { + return tag_; + } + const std::optional& name() const { + return name_; + } + const Arguments& arguments() const { + return arguments_; + } + TORCH_API TypeKind dynamicKind() const; + + // Should be used only on the server side to restore static type information. +#ifndef C10_MOBILE + TORCH_API +#endif + TypePtr fallback() const; + + private: + bool symmetric() const override { + return false; + } + friend struct Type; + // NOTE: Here we are using SingletonOrSharedTypePtr to mean + // "original-type-because-it-was-actually-a-DynamicType or shared". + static SingletonOrSharedTypePtr create(const Type& ty); + DynamicType(const Type& other); + bool equals(const DynamicType& other) const; + + template + bool compareArguments(const DynamicType& other, const F& f) const { + if (arguments_.elems.size() != other.arguments_.elems.size()) { + return false; + } + for (size_t i = 0; i < arguments_.elems.size(); i++) { + if (!f(arguments_.elems[i], other.arguments_.elems[i])) { + return false; + } + } + return true; + } + + Tag tag_; + std::optional name_; + union { + Arguments arguments_; + ClassTypePtr class_; + }; +}; + +template +struct DynamicTypeTrait { + C10_NOINLINE static auto tagValue() { + TORCH_CHECK(false); + return DynamicType::Tag::Any; + } +}; + +namespace detail { +C10_NOINLINE DynamicTypePtr makeBaseType(DynamicType::Tag tag); +} + +#define DYNAMIC_TYPE_TAG_VALUE(NAME, _, IS_BASE_TYPE) \ + template <> \ + struct TORCH_API DynamicTypeTrait { \ + C10_ERASE static auto tagValue() { \ + return DynamicType::Tag::NAME; \ + } \ + static constexpr bool isBaseType = IS_BASE_TYPE; \ + template \ + static std::enable_if_t getBaseType() { \ + static auto type = detail::makeBaseType(tagValue()); \ + return type; \ + } \ + }; // namespace c10 +FORALL_DYNAMIC_TYPES(DYNAMIC_TYPE_TAG_VALUE) +FORALL_DYNAMIC_TYPES_FAKE(DYNAMIC_TYPE_TAG_VALUE) +#undef DYNAMIC_TYPE_TAG_VALUE + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h new file mode 100644 index 0000000000000000000000000000000000000000..a5f38f49d2638be21cd7e99cfcec402577afc65f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from enum_tag.h + +namespace at { + // Enum of valid tags obtained from the entries in tags.yaml + enum class Tag { + core, + cudagraph_unsafe, + data_dependent_output, + dynamic_output_shape, + flexible_layout, + generated, + inplace_view, + maybe_aliasing_or_mutating, + needs_contiguous_strides, + needs_exact_strides, + needs_fixed_stride_order, + nondeterministic_bitwise, + nondeterministic_seeded, + pointwise, + pt2_compliant_tag, + reduction, + view_copy + }; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h new file mode 100644 index 0000000000000000000000000000000000000000..3c67209920ef6135e8f6ff0b54260e8c8631aaef --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace c10 { + +struct EnumType; +using EnumTypePtr = std::shared_ptr; +using EnumNameValue = std::pair; +struct TORCH_API EnumType : public NamedType { + friend struct Type; + static const TypeKind Kind = TypeKind::EnumType; + + static EnumTypePtr create( + const c10::QualifiedName& qualified_class_name, + TypePtr value, + std::vector enum_names_values, + std::weak_ptr<::torch::jit::CompilationUnit> cu) { + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (value->kind()) { + case TypeKind::IntType: + case TypeKind::FloatType: + case TypeKind::StringType: + return EnumTypePtr(new EnumType( + qualified_class_name, + std::move(value), + std::move(enum_names_values), + std::move(cu))); + default: + TORCH_CHECK( + false, + "Cannot create Enum with value type '", + value->str(), + "', only int, float and string are supported"); + } + C10_DIAGNOSTIC_POP() + } + + std::string str() const override { + return "Enum<" + annotation_str() + ">"; + } + + std::string repr_str() const override { + return str(); + } + + const TypePtr& getValueType() const { + return value_type_; + } + + bool equals(const Type& rhs) const override { + if (auto* enum_rhs = rhs.castRaw()) { + return name().has_value() && name() == enum_rhs->name() && + *getValueType() == *(enum_rhs->getValueType()) && + this->compilation_unit() == enum_rhs->compilation_unit(); + } + return false; + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::shared_ptr compilation_unit() + const { + auto cu = cu_.lock(); + return cu; + } + + const QualifiedName& qualifiedClassName() const { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name().value(); + } + + at::ArrayRef containedTypes() const override { + return value_type_; + } + + const at::ArrayRef enumNamesValues() const { + return enum_names_values_; + } + + private: + EnumType( + c10::QualifiedName qualified_class_name, + TypePtr value_type, + std::vector enum_names_values, + std::weak_ptr cu) + : NamedType(TypeKind::EnumType, std::move(qualified_class_name)), + value_type_(std::move(value_type)), + enum_names_values_(std::move(enum_names_values)), + cu_(std::move(cu)) {} + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return qualifiedClassName().qualifiedName(); + } + + TypePtr value_type_; + std::vector enum_names_values_; + std::weak_ptr<::torch::jit::CompilationUnit> cu_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function.h new file mode 100644 index 0000000000000000000000000000000000000000..255bb8715de7e9fbf7cafad7dcc9b7ec22f75c51 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { +struct FunctionSchema; +} + +namespace at { +TORCH_API void launch(std::function func); +} + +namespace torch::jit { + +struct Graph; +struct Code; + +namespace mobile { +struct Code; +} + +using Stack = std::vector; +using Kwargs = std::unordered_map; +struct RecursiveMethodCallError : public std::exception {}; +using TaskLauncher = std::function)>; + +TORCH_API void preoptimizeGraph( + std::shared_ptr& graph, + bool disable_autocast = false); + +// A Function is a pure Graph with no implicit `self` object bound. +// It contains schema information and the executor that manages the +// execution of the function. Method is a wrapper around an +// underlying Function that also provides a `self` object. +struct TORCH_API Function { + Function() = default; + Function(const Function&) = default; + Function& operator=(const Function&) = default; + Function(Function&&) noexcept = default; + Function& operator=(Function&&) noexcept = default; + virtual std::string_view doc_string() const { + static constexpr std::string_view no_doc_string; + return no_doc_string; + } + + virtual bool isGraphFunction() const { + return false; + } + + virtual void run(Stack& stack) = 0; + + virtual c10::intrusive_ptr runAsync( + Stack& /*stack*/, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + [[maybe_unused]] TaskLauncher taskLauncher = at::launch) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return {}; + } + + at::IValue operator()(Stack stack, const Kwargs& kwargs = Kwargs()) { + getSchema().checkAndNormalizeInputs(stack, kwargs); + run(stack); + return stack.front(); + } + + virtual const c10::QualifiedName& qualname() const = 0; + + const std::string& name() const { + return qualname().name(); + } + + // if this isn't yet defined, run its method_creator function + virtual void ensure_defined() = 0; + + virtual const c10::FunctionSchema& getSchema() const = 0; + + virtual size_t num_inputs() const = 0; + + virtual Function& setSchema(c10::FunctionSchema schema) = 0; + + // call() defines how different interpreter implementations interacts with + // Function objects. Basically interpreters need to provide a callback to + // communicate to Functions what to do if provided a Code object. + // Alternatively we could design the signature to return an optional Code + // object, but that requires special handling the null case in interpreter + // and the fallback behavior is not well defined by interpreter but rather + // Function themselves, so a callback approach is more reasonable than + // returning values. + // If call() returns true, then callback completes successfully, otherwise + // call() returns false. + + // Overload for server interpreter, a bailout size is needed for graph + // executor. + virtual bool call( + Stack& /*unused*/, + std::optional /*unused*/, + c10::function_ref /*unused*/) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return false; + } + + // Overload for mobile interpreter. + virtual bool call(Stack& /*unused*/, c10::function_ref /*unused*/) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return false; + } + + virtual ~Function() = default; +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h new file mode 100644 index 0000000000000000000000000000000000000000..a9f8e0238cdf0aea822e26c2dcb8c39284326305 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h @@ -0,0 +1,695 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// schema as used in the compiler for resolving function calls and reporting +// errors. These objects should be constructed from C10 schema once those +// are available. + +struct Argument; +struct FunctionSchema; + +using AliasTypeSet = std::vector; + +bool operator==(const Argument& lhs, const Argument& rhs); + +struct TORCH_API Argument { + Argument( + std::string name = "", + const TypePtr& type = nullptr, + std::optional N = std::nullopt, + std::optional default_value = std::nullopt, + bool kwarg_only = false, + std::optional alias_info = std::nullopt) + : Argument(std::move(name), type, type, N, std::move(default_value), kwarg_only, std::move(alias_info)) {} + + Argument( + std::string name, + TypePtr fake_type, + TypePtr real_type, + std::optional N = std::nullopt, + std::optional default_value = std::nullopt, + bool kwarg_only = false, + std::optional alias_info = std::nullopt) + : name_(std::move(name)), + type_(fake_type ? std::move(fake_type) : TensorType::get()), + real_type_(real_type ? std::move(real_type) : type_), + N_(N), + default_value_(std::move(default_value)), + alias_info_(alias_info ? std::make_unique(std::move(*alias_info)) : nullptr), + kwarg_only_(kwarg_only) { + // this is an softly-enforced invariant for out arguments. + bool is_alias = alias_info_ != nullptr && alias_info_->isWrite(); + is_out_ = kwarg_only_ && is_alias; + } + + Argument(Argument&& rhs) noexcept = default; + + Argument(const Argument& rhs) + : name_(rhs.name_), + type_(rhs.type_), + real_type_(rhs.real_type_), + N_(rhs.N_), + default_value_(rhs.default_value_), + alias_info_(rhs.alias_info_ ? std::make_unique(*rhs.alias_info_) : nullptr), + kwarg_only_(rhs.kwarg_only_), + is_out_(rhs.is_out_) {} + + Argument& operator=(Argument&& rhs) = default; + + Argument& operator=(const Argument& rhs) { + if (this != &rhs) { + name_ = rhs.name_; + type_ = rhs.type_; + real_type_ = rhs.real_type_; + N_ = rhs.N_; + default_value_ = rhs.default_value_; + alias_info_ = rhs.alias_info_ ? std::make_unique(*rhs.alias_info_) : nullptr; + kwarg_only_ = rhs.kwarg_only_; + is_out_ = rhs.is_out_; + } + return *this; + } + ~Argument() = default; + + const std::string& name() const { + return name_; + } + const TypePtr& type() const { + return type_; + } + // if type() is non-null, this is guaranteed to be non-null (if no real + // type was provided, this takes on type()'s value) + const TypePtr& real_type() const { + return real_type_; + } + const std::optional& N() const { + return N_; + } + const std::optional& default_value() const { + return default_value_; + } + bool kwarg_only() const { + return kwarg_only_; + } + + bool is_out() const { + return is_out_; + } + + [[nodiscard]] const AliasInfo* alias_info() const { + return alias_info_.get(); + } + + bool is_inferred_type() const { + bool is_inferred_type = false; + TORCH_INTERNAL_ASSERT(type_); + if (auto pt = type_->cast()) { + if (pt->isInferredType()) { + is_inferred_type = true; + } + } + return is_inferred_type; + } + + std::string formatTypeMismatchMsg(const std::string& actual_type) const { + std::string inferred_type_hint; + if (is_inferred_type()) { + inferred_type_hint = c10::str( + "Inferred '", + name(), + "' to be of type 'Tensor' ", + "because it was not annotated with an explicit type.\n"); + } + return c10::str( + "Expected a value of type '", + type()->repr_str(), + "' for argument '", + name(), + "' but instead found type '", + actual_type, + "'.\n", + inferred_type_hint); + } + + Argument cloneWithType(const TypePtr& new_type) const { + return Argument( + name_, + new_type, + N_, + default_value_, + kwarg_only_, + alias_info_ ? std::optional(*alias_info_) : std::nullopt); + } + + // this function checks whether this Argument is backward compatible with + // the old one. we consider the following cases are backward compatible: + // 1) two arguments are equal + // 2) this arg's type should be subtype of old + // 3) this arg must provide the same default value if old arg has one, + bool isBackwardCompatibleWith( + const Argument& old, + std::ostream* why_not=nullptr) const; + + // this function checks whether this Argument is forward compatible with + // the old one. we consider the following cases are forward compatible: + // 1) two arguments are equal + // 2) this arg's type should be subtype of old + // 3) this arg must provide the same default value if old arg has one, + bool isForwardCompatibleWith( + const Argument& old, + std::ostream* why_not = nullptr) const; + + private: + std::string name_; + TypePtr type_; + TypePtr real_type_; // this is ScalarType, not int, e.g. + // for list types, an optional statically known length for the list + // e.g. for int[3]: type = ListType::ofInts(), N = 3 + // If present, this will allow scalars to be broadcast to this length to + // become a list. + std::optional N_; + + std::optional default_value_; + // AliasInfo is huge, so let's only allocate memory for it if + // necessary (which it isn't during schema parsing on startup, to + // give a pertinent example). + std::unique_ptr alias_info_; + // is this only specifiable as a keyword argument? + bool kwarg_only_; + // marks if the argument is out variant of the schema + bool is_out_; +}; + +inline bool operator==(const Argument& lhs, const Argument& rhs) { + return lhs.name() == rhs.name() + && *lhs.type() == *rhs.type() + && lhs.N() == rhs.N() + && lhs.default_value() == rhs.default_value() + && lhs.kwarg_only() == rhs.kwarg_only() + && (lhs.alias_info() == rhs.alias_info() + || (lhs.alias_info() != nullptr && rhs.alias_info() != nullptr + && *lhs.alias_info() == *rhs.alias_info())); +} + +inline bool operator!=(const Argument& lhs, const Argument& rhs) { + return !(lhs == rhs); +} + +enum struct TORCH_API SchemaArgType { input, output }; + +/** + * struct SchemaArgument + * + * Structure used to represent arguments or returns for a schema. + */ +struct TORCH_API SchemaArgument { + SchemaArgType type; + size_t index; + SchemaArgument(SchemaArgType tpe, size_t idx) : type(tpe), index(idx) {} + bool operator==(const SchemaArgument& rhs) const { + return type == rhs.type && index == rhs.index; + } +}; + +bool operator==(const FunctionSchema& lhs, const FunctionSchema& rhs); + +struct TORCH_API FunctionSchema { + FunctionSchema( + std::string name, + std::string overload_name, + std::vector arguments, + std::vector returns, + bool is_vararg = false, + bool is_varret = false) + : name_({std::move(name), std::move(overload_name)}), + arguments_(std::move(arguments)), + returns_(std::move(returns)), + is_vararg_(is_vararg), + is_varret_(is_varret) { + checkSchema(); + } + + FunctionSchema( + Symbol name, + std::string overload_name, + std::vector arguments, + std::vector returns, + bool is_vararg = false, + bool is_varret = false) + : FunctionSchema( + name.toQualString(), + std::move(overload_name), + std::move(arguments), + std::move(returns), + is_vararg, + is_varret) { + checkSchema(); + } + + // Checks whether this schema is backward compatible with the old one. + // The following conditions must be true: + // [Function structure] The new schema's name, overload-name, varargs, and + // return arity are the same. + // [Output Narrowing] The new schema's output type must be the same class + // or inherit from the old schema's output type. + // [Argument count] The new schema must have at least as many arguments as + // the old schema (considering the list of positional and kwargs). + // [Arg Compatibility] Every argument in the old schema has a corresponding + // argument in the new schema that: + // * is at the same position. + // * has the same name. + // * is either positional, or kwarg and the old argument was kwarg. + // * has the same type, or the old argument's type inherits from the + // new argument's type. + // [Default Values] Every new argument must have a default value. + // E.g. + // OK f_new(a, b, c=1) => f_old(a, b) + // NOK f_new(a, c=1, *, b) => f_old(a, *, b) + // OK f_new(a, b, *, c) => f_old(a, *, b, c) + // NOK f_new(a, *, b, c) -> f_old(a, b, *, c) + // NOK f_new(a, *, c, b) => f_old(a, *, b, c) + // OK f_new(a, *, b, c, d=1) => f_old(a, *, b, c) + bool isBackwardCompatibleWith( + const FunctionSchema& old, + std::ostream* why_not = nullptr) const; + + // Checks whether this schema is forward compatible with the old one. + // The following conditions must be true: + // [Function structure] The new schema's name, overload-name, varargs, and + // return arity are the same. + // [Output Narrowing] The new schema's output type must be the same class + // or inherit from the old schema's output type. + // [Arg Compatibility] Every argument in the old schema has a corresponding + // argument in the new schema that: + // * is at the same position. + // * has the same name. + // * is either positional, or kwarg and the old argument was kwarg. + // * has the same type, or the old argument's type inherits from the + // new argument's type. + // [Default Values] Every new argument must have a default value. + // Each default value type should NOT be a container type. + // [Positioning] All defaults arguments MUST go after either old + // default arguments or the end of positional arguments + // and right BEFORE all out arguments + bool isForwardCompatibleWith( + const FunctionSchema& old, + std::ostringstream& why_not) const; + + private: + OperatorName name_; + std::vector arguments_; + std::vector returns_; + // if true then this schema takes an arbitrary number of additional arguments + // after the argument specified in arguments + // currently this is used primarily to represent 'primitive' operators whose + // arguments are not checked by schema + bool is_vararg_; + bool is_varret_; + + // if no alias information is directly specified, what kind of "default" + // alias information should we infer? + // NB: due to alias analysis kind merging, this may be nullopt. Eventually + // this should always be set no matter what + std::optional alias_kind_; + + template + void checkArg(const IValue& value, const Argument& argument, std::optional pos) const; + + void checkSchema() const { + bool seen_default_arg = false; + for (const auto& arg : arguments()) { + if (arg.default_value()) { + seen_default_arg = true; + } else { + // we have historically serialized broadcasting lists wo/default values, + // so to not break BC allow lists here + if (arg.type()->kind() == ListType::Kind) { + continue; + } + TORCH_INTERNAL_ASSERT( + !seen_default_arg || arg.kwarg_only(), + "Non-default positional argument follows default argument. Parameter ", + arg.name(), + " in ", + *this); + } + } + } + + public: + + void dump() const; + + const OperatorName& operator_name() const { + return name_; + } + const std::string& name() const { + return name_.name; + } + const std::string& overload_name() const { + return name_.overload_name; + } + const std::vector& arguments() const { + return arguments_; + } + const std::vector& returns() const { + return returns_; + } + bool is_vararg() const { + return is_vararg_; + } + bool is_varret() const { + return is_varret_; + } + bool is_aliasing(const c10::SchemaArgument &argument) const { + TORCH_INTERNAL_ASSERT( + argument.index < getCorrectList(argument.type).size(), + "Invalid index for schema."); + const AliasInfo* aliasInfo = getCorrectList(argument.type)[argument.index].alias_info(); + return aliasInfo; + } + bool is_mutable() const { + return std::any_of( + arguments_.cbegin(), arguments_.cend(), [](const Argument& arg) { + const AliasInfo* aliasInfo = arg.alias_info(); + return aliasInfo && aliasInfo->isWrite(); + }); + } + bool is_mutable(const c10::SchemaArgument &argument) const { + TORCH_INTERNAL_ASSERT( + argument.index < getCorrectList(argument.type).size(), + "Invalid index for schema."); + const AliasInfo* aliasInfo = getCorrectList(argument.type)[argument.index].alias_info(); + return aliasInfo && aliasInfo->isWrite(); + } + bool is_mutable(std::string_view name) const { + std::optional index = argumentIndexWithName(name); + TORCH_INTERNAL_ASSERT( + index.has_value(), "Schema has no argument named ", name); + + return is_mutable({c10::SchemaArgType::input, static_cast(*index)}); + } + + // Returns whether lhs and rhs may alias directly. + // This does not account for cases where lhs or rhs are a container that + // may contain elements that alias the other argument. + // FunctionSchema::may_contain_alias will include that functionality. + bool may_alias(const SchemaArgument& lhs, const SchemaArgument& rhs) const; + + // Returns whether lhs and rhs may alias directly or whether lhs/rhs are a container + // that may contain elements that alias the other argument. + // bidirectional = false only returns whether lhs may contain an alias of rhs + // while bidirectional = true returns both directions. + bool may_contain_alias(const SchemaArgument& lhs, const SchemaArgument& rhs, bool bidirectional = true) const; + + // Returns whether the two AliasTypeSets contain any similarities + // ie: whether the two type sets can alias. + bool canAliasTypeSetsAlias(const std::optional &lhs, const std::optional &rhs) const; + + // Recursively Finds all contained types within the AliasTypeSet. + std::optional getAliasTypeSetContainedTypes(const std::optional &aliasTypeSet) const; + + // Similar to mapTypeToAliasTypeSet defined in alias_analysis.cpp. + // Used to map types to a type such that all types that can alias will be mapped to the same type. + // For example, calling this method on 'Optional[List[int]]' is the same as calling this method + // on 'List[int]'. + std::optional mapTypeToAliasTypeSet(const TypePtr& type) const; + + // Returns either arguments() or returns() depending on the SchemaArgType + // output => returns(), input => arguments() + const std::vector& getCorrectList(SchemaArgType type) const; + + std::optional argumentIndexWithName(std::string_view name) const { + for (const auto i : c10::irange(arguments().size())) { + if(name == arguments()[i].name()) + return i; + } + return std::nullopt; + } + FunctionSchema cloneWithName(std::string name, std::string overload_name) const { + return FunctionSchema( + std::move(name), + std::move(overload_name), + arguments(), + returns(), + is_vararg(), + is_varret() + ); + } + FunctionSchema cloneWithArguments(std::vector new_arguments) const { + return FunctionSchema( + name(), + overload_name(), + std::move(new_arguments), + returns(), + is_vararg(), + is_varret()); + } + FunctionSchema cloneWithReturns(std::vector new_returns) const { + return FunctionSchema( + name(), + overload_name(), + arguments(), + std::move(new_returns), + is_vararg(), + is_varret()); + } + + std::string formatTypeMismatchMsg( + const Argument& expected, + const std::string& actual_type, + std::optional position = std::nullopt, + std::optional value = std::nullopt) const; + + FunctionSchema cloneWithRemappedTypes( + const std::function type_map) const; + + FunctionSchema cloneWithRealTypes(bool with_symint=true) const; + + // Check that inputs have the correct types and appends any missing default + // values. + template + void checkAndNormalizeInputs( + std::vector& inputs, + const std::unordered_map& kwargs = + std::unordered_map{}) const; + + std::string findErrorInKwargs(const std::vector& kwargs) const; + + bool hasAnyAliasInfo() const { + for (const auto& arg : arguments_) { + if (arg.alias_info() != nullptr) { + return true; + } + } + for (const auto& ret : returns_) { + if (ret.alias_info() != nullptr) { + return true; + } + } + return false; + } + + + // TODO remove the mutation here + bool isDefaultAliasAnalysisKind() const { + return !alias_kind_; + } + AliasAnalysisKind aliasAnalysis() const { + return alias_kind_.value_or(AliasAnalysisKind::CONSERVATIVE); + } + void setAliasAnalysis(AliasAnalysisKind v) { + alias_kind_ = v; + } + + std::optional getNamespace() const { + return name_.getNamespace(); + } + + // Returns true if we successfully set the namespace (as there + // was none set, and false otherwise) + bool setNamespaceIfNotSet(const char* ns) { + return name_.setNamespaceIfNotSet(ns); + } + + // can a function with this schema be substituted for a function of rhs's + // schema and have the program typecheck? + // as_method - if true, treat this schema as a method and ignore + // the first argument, which will be the object in both cases + bool isSubtypeOf(const FunctionSchema& rhs, bool as_method, std::ostream* why_not=nullptr) const; +}; + +inline bool operator==(const FunctionSchema& lhs, const FunctionSchema& rhs) { + return lhs.name() == rhs.name() + && lhs.overload_name() == rhs.overload_name() + && lhs.arguments() == rhs.arguments() + && lhs.returns() == rhs.returns() + && lhs.is_vararg() == rhs.is_vararg() + && lhs.is_varret() == rhs.is_varret(); +} + +inline bool operator!=(const FunctionSchema& lhs, const FunctionSchema& rhs) { + return !(lhs == rhs); +} + +// print out Argument, which is compatible with FunctionSchema parser +// full format: Type(alias)? name=default_value +inline std::ostream& operator<<(std::ostream& out, const Argument& arg) { + + // for adjusting the ? position. + // in schema, we have Tensor?(a!) input, and t(a!)?. + // however, t?(a!) doesn't work with schema parser. + // so we always use Type(alias)? format + // real_type versus fake_type: in order to be compatible with FunctionSchema + // parser, printing an argument with either MemoryFormat or Layout type should + // give us the original schema string, hence printing out real_type. + auto type = arg.real_type(); + bool is_opt = type->kind() == OptionalType::Kind; + auto unopt_type = is_opt ? type->castRaw()->getElementType() : type; + + if (unopt_type->kind() == ListType::Kind) { + // sized lists get size N from arg, not type + auto list = unopt_type->cast(); + out << list->getElementType()->str(); + if (arg.alias_info() && !arg.alias_info()->containedTypes().empty()){ + out << arg.alias_info()->containedTypes()[0]; + } + std::string N; + if (arg.N()) { + N = std::to_string(*arg.N()); + } + out << '[' << N << ']'; + } else { + out << unopt_type->str(); + } + + // print alias info if it has beforeSets. + if (arg.alias_info() && !arg.alias_info()->beforeSets().empty()) { + out << *arg.alias_info(); + } + + if (is_opt) { + out << '?'; + } + + if (!arg.name().empty()) { + out << ' ' << arg.name(); + } + + if (arg.default_value()) { + out << '='; + if ((type->kind() == c10::TypeKind::StringType || + unopt_type->kind() == c10::TypeKind::StringType) && + arg.default_value().value().isString()) { + printQuotedString(out, arg.default_value().value().toStringRef()); + } else if (type->kind() == TypeKind::ListType && type->castRaw()->getElementType()->kind() == c10::TypeKind::IntType) { + // We want to faithfully replicate JIT schema. + // in native_functions.yaml defaults for int arrays with a single value always look like + // int[2] stride=1 + // instead of + // int[2] stride=[1, 1] + auto default_val = arg.default_value().value().toIntList(); + if (default_val.size() > 1) { + auto all_defaults_the_same = true; + for (const auto i : c10::irange(1, default_val.size())) { + if (default_val[0] != default_val[i]) all_defaults_the_same = false; + } + if (all_defaults_the_same) { + out << default_val[0]; + } else { + out << arg.default_value().value(); + } + } else { + out << arg.default_value().value(); + } + } else { + out << arg.default_value().value(); + } + } + + return out; +} + +TORCH_API std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema); + +inline std::string toString(const FunctionSchema& schema) { + std::ostringstream str; + str << schema; + return str.str(); +} + +} // namespace c10 + +namespace std { +template<> + struct hash { + size_t operator()(const c10::SchemaArgument& arg) const + { + return c10::hash_combine(std::hash()(arg.index), std::hash()(static_cast(arg.type))); + } + }; +template<> + struct hash { + size_t operator()(const c10::Argument& arg) const + { + auto hash = std::hash{}(arg.name()); + auto type_hash = std::hash{}(arg.type()); + auto kwarg_only_hash = std::hash{}(arg.kwarg_only()); + hash = c10::hash_combine(hash, type_hash); + hash = c10::hash_combine(hash, kwarg_only_hash); + // hashing optional fields if they exist + if (arg.default_value().has_value()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + auto default_value_hash = c10::hash{}(*arg.default_value()); + hash = c10::hash_combine(hash, default_value_hash); + } + if (arg.N().has_value()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + auto N_hash = std::hash{}(*arg.N()); + hash = c10::hash_combine(hash, N_hash); + } + if (arg.alias_info()) { + auto alias_info_hash = std::hash{}(*arg.alias_info()); + hash = c10::hash_combine(hash, alias_info_hash); + } + return hash; + } + }; +template<> + struct hash { + size_t operator()(const c10::FunctionSchema& schema) const + { + auto hash = std::hash{}(schema.operator_name()); + auto args_hash = c10::hash>{}(schema.arguments()); + auto returns_hash = c10::hash>{}(schema.returns()); + auto is_vararg_hash = std::hash{}(schema.is_vararg()); + auto is_varret_hash = std::hash{}(schema.is_varret()); + hash = c10::hash_combine(hash, args_hash); + hash = c10::hash_combine(hash, returns_hash); + hash = c10::hash_combine(hash, is_vararg_hash); + hash = c10::hash_combine(hash, is_varret_hash); + return hash; + } + }; +} // namespace std + + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..0e7bd33a69878dc610bafe19017c7c36c1139b84 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace c10 { + +template +inline void FunctionSchema::checkArg( + const IValue& value, + const Argument& argument, + std::optional pos) const { + if (value.isTensor() && argument.type() == TensorType::get()) { + // Fast-path for the common case + return; + } + if (value.isGenericDict() && value.toGenericDict().empty()) { + return; + } + if (!value.type()->isSubtypeOf(*argument.type())) { + TORCH_CHECK( + false, + formatTypeMismatchMsg( + argument, value.type()->repr_str(), pos)); + } +} + +template +inline void FunctionSchema::checkAndNormalizeInputs( + std::vector& inputs, + const std::unordered_map& kwargs) const { + // Do we have more inputs than the schema accepts? + TORCH_CHECK( + inputs.size() <= arguments().size(), + "Expected at most ", + arguments().size(), + " argument(s) for operator '", + name(), + "', but received ", + inputs.size(), + " argument(s). Declaration: ", + *this); + + size_t consumed_kwargs = 0; + for (const auto pos : c10::irange(arguments().size())) { + const auto& argument = arguments()[pos]; + if (pos < inputs.size()) { + checkArg(inputs[pos], argument, pos); + continue; + } + auto it = kwargs.find(argument.name()); + if (it != kwargs.end()) { + checkArg(it->second, argument, std::nullopt); + inputs.push_back(it->second); + consumed_kwargs++; + continue; + } + if (argument.default_value()) { + inputs.push_back(*argument.default_value()); + continue; + } + TORCH_CHECK(false, + name(), + "() is missing value for argument '", + argument.name(), + "'. Declaration: ", + *this); + } + if (consumed_kwargs != kwargs.size()) { + std::vector names; + names.reserve(kwargs.size()); + for(const auto& k : kwargs) { + names.emplace_back(k.first); + } + TORCH_CHECK(false, findErrorInKwargs(names)); + } +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/functional.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/functional.h new file mode 100644 index 0000000000000000000000000000000000000000..c5bf676563f2704d91659ed57b4d757331432279 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/functional.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +// The passed in function must take T by value (T), or by +// const reference (const T&); taking T by non-const reference +// will result in an error like: +// +// error: no type named 'type' in 'class std::invoke_result' +// +// No explicit template parameters are required. + +// Overload for explicit function and ArrayRef +template +inline auto fmap(const T& inputs, const F& fn) -> std::vector { + std::vector r; + r.reserve(inputs.size()); + for(const auto & input : inputs) + r.push_back(fn(input)); + return r; +} + +// C++ forbids taking an address of a constructor, so here's a workaround... +// Overload for constructor (R) application +template +inline std::vector fmap(const T& inputs) { + std::vector r; + r.reserve(inputs.size()); + for(auto & input : inputs) + r.push_back(R(input)); + return r; +} + +template +inline std::vector filter(at::ArrayRef inputs, const F& fn) { + std::vector r; + r.reserve(inputs.size()); + for(auto & input : inputs) { + if (fn(input)) { + r.push_back(input); + } + } + return r; +} + +template +inline std::vector filter(const std::vector& inputs, const F& fn) { + return filter(static_cast>(inputs), fn); +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h new file mode 100644 index 0000000000000000000000000000000000000000..15b2c523a2f07e92eab466d3193577817752ed4b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + using GradMode = c10::GradMode; + using AutoGradMode = c10::AutoGradMode; + using NoGradGuard = c10::NoGradGuard; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..ffe7388fe4a57d082b91906c0e629b3531be8784 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h @@ -0,0 +1,360 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace c10 { + +#define FORALL_NS_SYMBOLS(_) \ + _(namespaces, prim) \ + _(namespaces, prims) \ + _(namespaces, nvprims) \ + _(namespaces, aten) \ + _(namespaces, cuda) \ + _(namespaces, onnx) \ + _(namespaces, attr) \ + _(namespaces, scope) \ + _(namespaces, user) \ + _(namespaces, _caffe2) \ + _(namespaces, dimname) \ + _(namespaces, namespaces) \ + _(prim, Assign) \ + _(prim, BroadcastingChunk) \ + _(prim, BroadcastSizes) \ + _(prim, ReductionSizes) \ + _(prim, Constant) \ + _(prim, ChunkSizes) \ + _(prim, ConstantMKLDNNTensor) \ + _(prim, BroadcastMKLDNNTensors) \ + _(prim, MKLDNNGroup) \ + _(prim, MKLDNNHardSwish) \ + _(prim, MKLDNNHardSigmoid) \ + _(prim, MKLDNNHardTanh) \ + _(prim, MKLDNNClamp) \ + _(prim, StaticRuntimeCopyOuts) \ + _(prim, Drop) \ + _(prim, Eval) \ + _(prim, Expand) /* onnx */ \ + _(prim, FusionGroup) \ + _(prim, CudaFusionGroup) \ + _(prim, CudaFusionGuard) \ + _(prim, oneDNNFusionGroup) \ + _(prim, oneDNNFusionGuard) \ + _(prim, FunctionalGraph) \ + _(prim, add_optional) \ + _(prim, view_copy) \ + _(prim, permute_copy) \ + _(prim, reshape_copy) \ + _(prim, squeeze_copy) \ + _(prim, t_copy) \ + _(prim, transpose_copy) \ + _(prim, unsqueeze_copy) \ + _(prim, flatten_copy) \ + _(prim, expand_copy) \ + _(prim, expand_as_copy) \ + _(prim, DifferentiableGraph) \ + _(prim, TensorExprGroup) \ + _(prim, TensorExprDynamicGroup) \ + _(prim, StaticSubgraph) \ + _(prim, If) \ + _(prim, Jump) /* debug */ \ + _(prim, JumpNZ) /* debug */ \ + _(prim, JumpZ) /* debug */ \ + _(prim, Load) \ + _(prim, Loop) \ + _(prim, Param) \ + _(prim, PackPadded) /* onnx */ \ + _(prim, PadPacked) /* onnx */ \ + _(prim, Placeholder) /* debug */ \ + _(prim, Print) \ + _(prim, EmptyListLiteral) \ + _(prim, LegacyTypedConstructor) \ + _(prim, PythonOp) \ + _(prim, IgnoredPythonOp) \ + _(prim, Reverse) \ + _(prim, Return) \ + _(prim, ReturnStmt) \ + _(prim, BreakStmt) \ + _(prim, ContinueStmt) \ + _(prim, ComprehensionScope) \ + _(prim, Store) \ + _(prim, AutogradZero) \ + _(prim, AutogradAnyNonZero) \ + _(prim, AutogradAllNonZero) \ + _(prim, AutogradAllZero) \ + _(prim, Starred) \ + _(prim, TupleConstruct) \ + _(prim, TupleUnpack) \ + _(prim, TupleIndex) \ + _(prim, TupleSlice) \ + _(prim, ListConstruct) \ + _(prim, ListUnpack) \ + _(prim, DictConstruct) \ + _(prim, ModuleContainerIndex) \ + _(prim, EnumName) \ + _(prim, EnumValue) \ + _(prim, StringIndex) \ + _(prim, NumToTensor) \ + _(prim, Uninitialized) \ + _(prim, VarConcat) \ + _(prim, VarStack) \ + _(prim, With) \ + _(prim, Enter) \ + _(prim, Exit) \ + _(prim, IfThenElse) \ + _(aten, Bool) \ + _(aten, Int) \ + _(aten, FloatImplicit) \ + _(aten, ComplexImplicit) \ + _(aten, IntImplicit) \ + _(aten, ScalarImplicit) \ + _(aten, Float) \ + _(aten, Complex) \ + _(aten, str) \ + _(aten, Delete) \ + _(prim, device) \ + _(prim, dtype) \ + _(prim, layout) \ + _(prim, id) \ + _(prim, requires_grad) \ + _(prim, MakeTestTensor) /* test */ \ + _(prim, AutogradAdd) \ + _(prim, GradOf) \ + _(aten, grad) \ + _(aten, backward) \ + _(prim, Guard) \ + _(prim, BailOut) \ + _(prim, TypeCheck) \ + _(prim, RequiresGradCheck) \ + _(prim, FallbackGraph) \ + _(prim, FusedConcat) \ + _(prim, ConstantChunk) \ + _(prim, MMTreeReduce) \ + _(prim, MMBatchSide) \ + _(prim, list) \ + _(prim, dict) \ + _(prim, min) \ + _(prim, max) \ + _(prim, abs) \ + _(aten, divmod) \ + _(prim, zip) \ + _(prim, enumerate) \ + _(prim, range) \ + _(prim, rangelist) \ + _(prim, isinstance) \ + _(prim, tolist) \ + _(prim, unchecked_cast) \ + _(aten, _grad_sum_to_size) \ + _(aten, _size_if_not_equal) \ + _(aten, _ncf_unsqueeze) \ + _(aten, warn) \ + _(aten, sorted) \ + _(aten, floordiv) \ + _(aten, __range_length) \ + _(aten, __derive_index) \ + _(aten, __round_to_zero_floordiv) \ + _(aten, is_scripting) \ + _(aten, _unwrap_optional) \ + _(prim, fork) \ + _(prim, awaitable) \ + _(prim, forkClosure) \ + _(prim, awaitableClosure) \ + _(prim, awaitable_nowait) \ + _(prim, awaitable_wait) \ + _(prim, RaiseException) \ + _(prim, Closure) \ + _(prim, CreateObject) \ + _(prim, SetAttr) \ + _(prim, GetAttr) \ + _(prim, HasAttr) \ + _(prim, profile) \ + _(prim, profile_ivalue) \ + _(prim, AddStatValue) \ + _(prim, TimePoint) \ + _(prim, CallFunction) \ + _(prim, CallMethod) \ + _(prim, LoopContinuation) \ + _(prim, annotate) \ + _(prim, TracedModuleForward) \ + _(prim, TracedFork) \ + _(prim, TracedAttr) \ + _(prim, rpc_async) \ + _(prim, rpc_sync) \ + _(prim, rpc_remote) \ + _(prim, is_cuda) \ + _(aten, append) \ + _(aten, as_tensor) \ + _(aten, adaptive_avg_pool2d_backward) \ + _(aten, dim) \ + _(aten, format) \ + _(aten, percentFormat) \ + _(aten, __not__) \ + _(aten, __is__) \ + _(aten, __isnot__) \ + _(aten, _ger) \ + _(aten, __getitem__) \ + _(aten, _set_item) \ + _(aten, manual_seed) \ + _(aten, device) \ + _(aten, hash) \ + _(aten, len) \ + _(aten, list) \ + _(aten, dict) \ + _(aten, wait) \ + _(aten, save) \ + _(aten, keys) \ + _(aten, ord) \ + _(aten, chr) \ + _(aten, hex) \ + _(aten, oct) \ + _(aten, clear) \ + _(aten, setdefault) \ + _(aten, bin) \ + _(aten, pop) \ + _(aten, insert) \ + _(aten, tensor) \ + _(prim, unchecked_unwrap_optional) \ + _(aten, __contains__) \ + _(prim, BailoutTemplate) \ + _(prim, grad) \ + _(cuda, _set_device) \ + _(cuda, set_stream) \ + _(cuda, _current_device) \ + _(cuda, synchronize) \ + _(aten, has_torch_function) \ + _(aten, is_autocast_enabled) \ + _(aten, is_autocast_cpu_enabled) \ + _(aten, is_autocast_xla_enabled) \ + _(aten, get_autocast_dtype) \ + _(aten, is_autocast_mps_enabled) \ + FORALL_ATEN_BASE_SYMBOLS(_) \ + _(onnx, Add) \ + _(onnx, Concat) \ + _(onnx, Constant) \ + _(onnx, ConstantFill) \ + _(onnx, Div) \ + _(onnx, GRU) \ + _(onnx, Gather) \ + _(onnx, Gemm) \ + _(onnx, LSTM) \ + _(onnx, MatMul) \ + _(onnx, Min) \ + _(onnx, Max) \ + _(onnx, Mul) \ + _(onnx, Pow) \ + _(onnx, RNN) \ + _(onnx, Shape) \ + _(onnx, Size) \ + _(onnx, Slice) \ + _(onnx, Softmax) \ + _(onnx, Squeeze) \ + _(onnx, Sub) \ + _(onnx, Transpose) \ + _(onnx, Unsqueeze) \ + _(onnx, Loop) \ + _(onnx, If) \ + _(onnx, Reshape) \ + _(onnx, Expand) \ + _(onnx, Equal) \ + _(onnx, Greater) \ + _(onnx, GreaterOrEqual) \ + _(onnx, Less) \ + _(onnx, LessOrEqual) \ + _(onnx, Not) \ + _(aten, ATen) \ + _(onnx, Split) \ + _(onnx, ConstantOfShape) \ + _(onnx, Cast) \ + _(onnx, Mod) \ + _(onnx, Sqrt) \ + _(onnx, SplitToSequence) \ + _(onnx, SequenceAt) \ + _(onnx, SequenceConstruct) \ + _(onnx, SequenceEmpty) \ + _(onnx, SequenceInsert) \ + _(onnx, SequenceErase) \ + _(onnx, ConcatFromSequence) \ + _(onnx, Identity) \ + _(onnx, SoftmaxCrossEntropyLoss) \ + _(onnx, NegativeLogLikelihoodLoss) \ + _(onnx, LogSoftmax) \ + _(onnx, ReduceL1) \ + _(onnx, ReduceL2) \ + _(onnx, Conv) \ + _(onnx, BatchNormalization) \ + _(onnx, ReduceMean) \ + _(onnx, ReduceProd) \ + _(onnx, Relu) \ + _(onnx, Neg) \ + _(onnx, NonZero) \ + _(onnx, Range) \ + _(onnx, Tile) \ + _(onnx, Where) \ + _(onnx, Optional) \ + _(onnx, OptionalGetElement) \ + _(onnx, OptionalHasElement) \ + FORALL_ATTR_BASE_SYMBOLS(_) \ + _(attr, Subgraph) \ + _(attr, ReverseSubgraph) \ + _(attr, f_real_outputs) \ + _(attr, df_input_vjps) \ + _(attr, df_input_captured_inputs) \ + _(attr, df_input_captured_outputs) \ + _(attr, df_output_vjps) \ + _(attr, axes) \ + _(attr, symbolic_shape_inputs) \ + _(attr, allow_stack_outputs) \ + _(attr, striding_inputs_desc) \ + _(attr, striding_outputs_desc) \ + _(attr, broadcast) \ + _(attr, direction) \ + _(attr, ends) \ + _(attr, inplace) \ + _(attr, input_as_shape) \ + _(attr, is_zero) \ + _(attr, num_none) \ + _(attr, num_present) \ + _(attr, perm) \ + _(attr, starts) \ + _(attr, profiled_type) \ + _(attr, transA) \ + _(attr, transB) \ + _(attr, name) \ + _(attr, module) \ + _(attr, beg) \ + _(attr, idx) \ + _(attr, split) \ + _(attr, slot) \ + _(attr, kinds) \ + _(attr, types) \ + _(attr, scope) \ + _(attr, keepdims) \ + _(attr, cache_id) \ + _(attr, new_axis) \ + _(attr, warn_id) \ + _(attr, output_layouts) \ + _(attr, allowzero) \ + _(attr, seen_none) \ + _(attr, overload_name) \ + _(attr, node_stack_idx) + +enum class _keys : unique_t { + #define DEFINE_KEY(ns, s) ns##_##s, + FORALL_NS_SYMBOLS(DEFINE_KEY) + #undef DEFINE_KEY + num_symbols +}; + +#define DEFINE_SYMBOL(ns, s) \ + namespace ns { constexpr Symbol s(static_cast(_keys::ns##_##s)); } +FORALL_NS_SYMBOLS(DEFINE_SYMBOL) +#undef DEFINE_SYMBOL + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h new file mode 100644 index 0000000000000000000000000000000000000000..ee490997967e4ae139de41a89ee511bbb95e0d04 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include + +namespace c10 { + +struct TORCH_API InternedStrings { + InternedStrings(); + Symbol symbol(const std::string& s); + std::pair string(Symbol sym); + Symbol ns(Symbol sym); + + private: + // prereq - holding mutex_ + Symbol _symbol(const std::string& s); + std::pair customString(Symbol sym); + std::unordered_map string_to_sym_; + + struct SymbolInfo { + Symbol ns; + std::string qual_name; + std::string unqual_name; + }; + std::vector sym_to_info_; + + std::mutex mutex_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h new file mode 100644 index 0000000000000000000000000000000000000000..1c0d3221f4b1a7afff964ab6e37472bb8131fd2e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h @@ -0,0 +1,1642 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-default") + +namespace torch { +class TORCH_API CustomClassHolder : public c10::intrusive_ptr_target {}; +namespace jit { +using ::torch::CustomClassHolder; +struct Function; +struct CompilationUnit; +struct Module; +} // namespace jit +} // namespace torch +namespace c10 { +template +class Dict; +template +class List; +template +class IListRef; +struct IValue; +struct ClassType; +struct Type; +class RRefInterface; + +struct ClassType; +using ClassTypePtr = std::shared_ptr; + +TORCH_API bool _fastEqualsForContainer(const IValue& lhs, const IValue& rhs); + +TORCH_API torch::jit::Function* checkObjectSortSchema( + const c10::ClassTypePtr& t, + std::stringstream& why_not); + +// A comparator that checks ordering of two IValues of same type. +typedef std::function IValueComparator; + +TORCH_API IValueComparator getLessThanComparator(const IValue& v); +TORCH_API IValueComparator getGreaterThanComparator(const IValue& v); + +namespace ivalue { +struct Tuple; +struct Future; +struct Await; +struct ConstantString; +struct GenericDict; +struct Object; +struct PyObjectHolder; +struct EnumHolder; +// We need a ComplexHolder because currently the payloads in the Union +// only take 64 bits. Since ComplexDouble takes up 128 bits, and is too big +// to fit in the IValue directly, we indirect complex numbers through an +// intrusive pointer to ComplexHolder (which contains a c10::complex). +struct ComplexHolder : c10::intrusive_ptr_target { + public: + template + ComplexHolder(c10::complex c) { + val = convert>(c); + } + ComplexHolder() = default; + c10::complex val; +}; + +// Similar to ComplexHolder, for StreamData3 +struct StreamData3Holder : c10::intrusive_ptr_target { + public: + StreamData3Holder(struct c10::StreamData3 d) : val(d) {} + StreamData3Holder() = delete; + struct c10::StreamData3 val; +}; + +} // namespace ivalue + +// This is an owning wrapper for a std::optional> +// that can be implicitly converted to a (non-owning) std::optional>. +// Its purpose is to be used in generated code to keep the vector alive +// either until the end of a statement (as a temporary), or as a saved arg +// in autograd. +template +struct OptionalArray { + std::optional> list; + + OptionalArray() = default; + OptionalArray(std::vector val) : list(std::move(val)) {} + + // Used when saving an argument for the backwards pass. + OptionalArray& operator=(std::optional> ref) { + if (ref) { + list = std::vector(ref->begin(), ref->end()); + } else { + list = std::nullopt; + } + return *this; + } + + // Used when saving an argument for the backwards pass. + OptionalArray& operator=(c10::OptionalArrayRef ref) { + if (ref) { + list = std::vector(ref->begin(), ref->end()); + } else { + list = std::nullopt; + } + return *this; + } + + operator std::optional>() { + if (!list) { + return std::nullopt; + } + return *list; + } + + operator c10::OptionalArrayRef() { + if (!list) { + return std::nullopt; + } + return *list; + } +}; + +// Capsule is an internal implementation detail of custom C++ classes. We +// define it as an owning wrapper for +// c10::intrusive_ptr This wrapper is here to serve as +// an abstraction of the type erased custom class object pointer. It also allow +// pybind11 to treat this as a standalone class to register as a separate type +// caster, instead of a custom pointer holder which the pointer holder type +// caster try to "unwrap" it automatically. +struct Capsule { + c10::intrusive_ptr obj_ptr; + explicit Capsule(c10::intrusive_ptr ptr) + : obj_ptr(std::move(ptr)) {} +}; + +// IValue is the generic tagged union used by the interpreter to hold +// all value types. +// It is a 16-byte object with an 8-byte payload and an 8-byte tag. +// The tag is currently 4 bytes to determine the type, and 1 byte +// to mark whether that type is a subtype of c10::intrusive_ptr_target and needs +// retain/release calls. + +#define TORCH_FORALL_TAGS(_) \ + _(None) \ + _(Tensor) \ + _(Storage) \ + _(Double) \ + _(ComplexDouble) \ + _(Int) \ + _(UInt) \ + _(SymInt) \ + _(SymFloat) \ + _(SymBool) \ + _(Bool) \ + _(Tuple) \ + _(String) \ + _(Blob) \ + _(GenericList) \ + _(GenericDict) \ + _(Future) \ + _(Await) \ + _(Device) \ + _(Stream) \ + _(Object) \ + _(PyObject) \ + _(Uninitialized) \ + _(Capsule) \ + _(RRef) \ + _(Quantizer) \ + _(Generator) \ + _(Enum) + +// [doxygen private] +// These methods are not actually private but we don't want to document them, so +// they are marked `@private`, which hides them on the doxygen documentation for +// this page. + +/// IValue (Interpreter Value) is a tagged union over the types +/// supported by the TorchScript interpreter. IValues contain their +/// values as an `IValue::Payload`, which holds primitive types +/// (`int64_t`, `bool`, `double`, `Device`) and `Tensor` as values, +/// and all other types as a `c10::intrusive_ptr`. In order to +/// optimize performance of the destructor and related operations by +/// making the `Tensor` and `c10::intrusive_ptr` paths generate the +/// same code, we represent a null `c10::intrusive_ptr` as +/// `UndefinedTensorImpl::singleton()`, *not* `nullptr`. +/// +/// IValues are used as inputs to and outputs from the TorchScript interpreter. +/// To retrieve the value contained within an IValue, use the `.toX()` methods, +/// where `X` is the type you are trying to get. Note that neither the `.toX()` +/// methods nor the templated `.to` functions do any kind of casting, they +/// only unwrap the contained value. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// // Make the IValue +/// torch::IValue my_ivalue(26); +/// std::cout << my_ivalue << "\n"; +/// +/// // Unwrap the IValue +/// int64_t my_int = my_ivalue.toInt(); +/// std::cout << my_int << "\n"; +/// +/// // This will throw an error! +/// // `my_ivalue` is tagged as an int and cannot be used as another type +/// torch::Tensor my_tensor = my_ivalue.toTensor(); +/// \endrst +struct TORCH_API IValue final { + IValue(const IValue& rhs) : IValue(rhs.payload, rhs.tag) { + if (isIntrusivePtr() && + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr); + } + } + + IValue(IValue&& rhs) noexcept : tag(rhs.tag) { + moveFrom(std::move(rhs)); + } + + /// @private [doxygen private] + ~IValue() { + destroy(); + } + + C10_ALWAYS_INLINE IValue& operator=(IValue&& rhs) & noexcept { + if (&rhs == this) { + return *this; + } + + destroy(); + moveFrom(std::move(rhs)); + return *this; + } + + IValue& operator=(IValue const& rhs) & { + *this = IValue(rhs); + return *this; + } + + void dump() const; + + /** + * Equality comparison. The semantics are the same as Python's `==`: + * 1. Numerical types are compared by value. + * 2. Tensors compute element-wise equality, returning a BoolTensor (see: + * `torch.eq()`) + * 3. Strings are compared by value. + * 4. Sequence types (list, tuple) are compared lexicographically by + * comparing their elements. Different sequence types never compare equal. + * 5. Mappings (dict) must have equal (key, value) pairs. + * 6. If not listed above, the default behavior for is to test identity + * equality (e.g. pointer equality). + * + * Why does this return an IValue instead of a bool? Because in PyTorch, + * `tensor1 == tensor2` returns a `BoolTensor`, not a bool. + * + * NOTE: we (like Python) assume that identity equality implies value equality + * for efficiency. + * TODO: need to support customizing equality + */ + IValue equals(const IValue& rhs) const; + /** + * This implements the same semantics as `bool(lhs == rhs)` in Python. which + * is the same as `equals()` except for Tensor types. + */ + TORCH_API friend bool operator==(const IValue& lhs, const IValue& rhs); + TORCH_API friend bool operator!=(const IValue& lhs, const IValue& rhs); + + /** + * Identity comparison. Checks if `this` is the same object as `rhs`. The + * semantics are the same as Python's `is` operator. + * + * NOTE: Like in Python, this operation is poorly defined for primitive types + * like numbers and strings. Prefer to use `==` unless you really want to + * check identity equality. + */ + bool is(const IValue& rhs) const; + + /** + * Hashing for IValues. Returns an IValue-boxed int. + * + * Some notes: + * - Like eager, Tensors are hashed by looking at the pointer. This is not + * strictly correct because two value-equal tensors with different tensor + * pointers will hash differently, but we choose to reproduce the eager + * semantics. + * - Hashing is not defined on all built-in IValue types (e.g. list and + * dict), following Python. Calling `hash()` on these types will throw. + */ + IValue hash() const { + return (int64_t)IValue::hash(*this); + } + // This is defined because `c10::hash` dispatches to a function of this + // signature. See the member function `hash()`. + static size_t hash(const IValue& iv); + + /** + * @private [doxygen private] + * [container equality] + * This is an equality implementation that assumes objects with the same + * identity equal themselves, for efficiency reasons. We primarily have this + * for consistency, because Python does the same thing. This actually + * provokes user-visible changes in behavior due to quirks in torch: + * [tensor1] == [tensor1] -> True (because container equality will first + * compare identity) [tensor1] == [tensor1_copy] -> RuntimeError: + * Boolean value of Tensor with more than one value is ambiguous + */ + TORCH_API friend bool _fastEqualsForContainer( + const IValue& lhs, + const IValue& rhs); + + private: + static bool isAliasOf(const at::Tensor& a, const at::Tensor& b) { + if (a.is_sparse()) { + return isAliasOf(a._values(), b) || isAliasOf(a._indices(), b); + } + if (b.is_sparse()) { + return isAliasOf(a, b._values()) || isAliasOf(a, b._indices()); + } + if (a.is_sparse_csr()) { + return isAliasOf(a.values(), b) || isAliasOf(a.crow_indices(), b) || + isAliasOf(a.col_indices(), b); + } + if (b.is_sparse_csr()) { + return isAliasOf(a, b.values()) || isAliasOf(a, b.crow_indices()) || + isAliasOf(a, b.col_indices()); + } + + // Opaque tensors such as the ones constructed by the MKL-DNN backend + // don't have storage so we just compare their TensorImpls. + // TODO: Find way to expose alias info for opaque tensors. + if (!a.has_storage() || !b.has_storage()) { + return a.unsafeGetTensorImpl() == b.unsafeGetTensorImpl(); + } + + return a.is_alias_of(b); + } + + template + bool isListOf() const; + + public: + /// @private [doxygen private] + bool isAliasOf(const IValue& rhs) const { + if (this->tag != rhs.tag) { + // Trivially don't alias if the type is different + return false; + } + + // Tensors should be compared based on internal storage + if (this->isTensor()) { + return isAliasOf(this->toTensor(), rhs.toTensor()); + } + + if (!isIntrusivePtr()) { + // Primitive types don't alias anything + return false; + } + + AT_ASSERT(rhs.isIntrusivePtr()); + + // Other types can be compared by their ptr value + return this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr; + } + + /// @private [doxygen private] + size_t use_count() const noexcept { + if (isTensor()) { + return payload.as_tensor.use_count(); + } + + if (!isIntrusivePtrLegacyBehavior()) { + return 1; + } + + if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { + return 0; + } + return c10::raw::intrusive_ptr::use_count(payload.u.as_intrusive_ptr); + } + + /// @private [doxygen private] + void swap(IValue& rhs) noexcept { + if (isTensor() && rhs.isTensor()) { + std::swap(payload.as_tensor, rhs.payload.as_tensor); + } else if (isTensor()) { + at::Tensor t = std::move(payload.as_tensor); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // payload.as_tensor.~Tensor(); + payload.u = rhs.payload.u; + new (&rhs.payload.as_tensor) at::Tensor(std::move(t)); + } else if (rhs.isTensor()) { + rhs.swap(*this); + return; + } else { + std::swap(payload.u, rhs.payload.u); + } + std::swap(tag, rhs.tag); + } + + // Accessors for subtypes are arranged together below + // While some of these accessors could be generated through templates, + // we prefer to write them manually for clarity + + IValue(at::TensorBase t) : tag(Tag::Tensor) { + new (&payload.as_tensor) at::Tensor(std::move(t)); + } + bool isTensor() const { + return Tag::Tensor == tag; + } + + private: + // Outlined error path so that toTensor() can be inlined. + [[noreturn]] void reportToTensorTypeError() const; + + public: + at::Tensor toTensor() &&; + at::Tensor& toTensor() &; + const at::Tensor& toTensor() const&; + at::TensorImpl* unsafeToTensorImpl() const { + TORCH_INTERNAL_ASSERT(isTensor()); + return payload.as_tensor.unsafeGetTensorImpl(); + } + + IValue(at::Storage s) : tag(Tag::Storage) { + payload.u.as_intrusive_ptr = + null_to_undefined_tensor(s.unsafeReleaseStorageImpl()); + } + bool isStorage() const { + return Tag::Storage == tag; + } + c10::Storage toStorage() &&; + c10::Storage toStorage() const&; + + const IValue& toIValue() const { + return *this; + } + IValue& toIValue() { + return *this; + } + + /// @private [doxygen private] + IValue(intrusive_ptr blob) : tag(Tag::Blob) { + // TODO (after Tensor merge) If we pass in a Blob holding a Tensor, extract + // and store it as a Tensor instead. + payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); + } + + /// @private [doxygen private] + bool isBlob() const { + return Tag::Blob == tag; + } + + /// @private [doxygen private] + c10::intrusive_ptr toBlob() &&; + + /// @private [doxygen private] + c10::intrusive_ptr toBlob() const&; + + // Capsule. No new callsites of these APIs should + // be introduced. + static inline IValue make_capsule( + intrusive_ptr blob); + bool isCapsule() const { + return Tag::Capsule == tag; + } + c10::intrusive_ptr toCapsule() &&; + c10::intrusive_ptr toCapsule() const&; + + // Custom C++ classes + template < + typename T, + std::enable_if_t, int> = 0> + IValue(intrusive_ptr custom_class); + bool isCustomClass() const; + template + c10::intrusive_ptr toCustomClass() &&; + template + c10::intrusive_ptr toCustomClass() const&; + + // Tuple + IValue(c10::intrusive_ptr v); + + template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> + IValue(const std::tuple& t); + template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> + IValue(std::tuple&& t); + bool isTuple() const { + return Tag::Tuple == tag; + } + c10::intrusive_ptr toTuple() &&; + c10::intrusive_ptr toTuple() const&; + [[nodiscard]] ivalue::Tuple& toTupleRef() const; + + // Double + IValue(double d) : tag(Tag::Double) { + payload.u.as_double = d; + } + bool isDouble() const { + return Tag::Double == tag; + } + double toDouble() const { + if (isDouble()) { + return payload.u.as_double; + } else if (isSymFloat()) { + return toSymFloat().guard_float(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected double"); + } + } + + // ComplexDouble + template + IValue(c10::complex c); + bool isComplexDouble() const { + return Tag::ComplexDouble == tag; + } + c10::complex toComplexDouble() const; + + // Future + IValue(c10::intrusive_ptr v); + bool isFuture() const { + return Tag::Future == tag; + } + c10::intrusive_ptr toFuture() &&; + c10::intrusive_ptr toFuture() const&; + + IValue(c10::intrusive_ptr v); + bool isAwait() const { + return Tag::Await == tag; + } + c10::intrusive_ptr toAwait() &&; + c10::intrusive_ptr toAwait() const&; + + // RRef + IValue(c10::intrusive_ptr v); + bool isRRef() const { + return Tag::RRef == tag; + } + c10::intrusive_ptr toRRef() &&; + c10::intrusive_ptr toRRef() const&; + + // Quantizer + IValue(c10::intrusive_ptr v); + bool isQuantizer() const { + return Tag::Quantizer == tag; + } + c10::intrusive_ptr toQuantizer() &&; + c10::intrusive_ptr toQuantizer() const&; + + // Int + IValue(int64_t i) : tag(Tag::Int) { + payload.u.as_int = i; + } + + IValue(const c10::SymInt& i) { + if (auto mi = i.maybe_as_int()) { + tag = Tag::Int; + payload.u.as_int = *mi; + } else { + tag = Tag::SymInt; + payload.u.as_intrusive_ptr = i.toSymNode().release(); + } + } + + bool isSymInt() const { + return Tag::SymInt == tag; + } + + c10::SymInt toSymInt() &&; + c10::SymInt toSymInt() const&; + + IValue(const c10::SymFloat& i) { + if (i.is_symbolic()) { + tag = Tag::SymFloat; + payload.u.as_intrusive_ptr = i.toSymNodeImpl().release(); + } else { + tag = Tag::Double; + payload.u.as_double = i.as_float_unchecked(); + } + } + + bool isSymFloat() const { + return Tag::SymFloat == tag; + } + + c10::SymFloat toSymFloat() &&; + c10::SymFloat toSymFloat() const&; + + IValue(const c10::SymBool& i) { + if (auto mi = i.maybe_as_bool()) { + tag = Tag::Bool; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + payload.u.as_int = *mi; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* due to byteorder if value assigned as_int, as_bool actually is not set correctly */ + payload.u.as_bool = *mi; +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif + } else { + tag = Tag::SymBool; + payload.u.as_intrusive_ptr = i.toSymNodeImpl().release(); + } + } + + bool isSymBool() const { + return Tag::SymBool == tag; + } + + c10::SymBool toSymBool() &&; + c10::SymBool toSymBool() const&; + + // allow you to pass literals (3, 4) without ambiguity + IValue(int32_t i) : IValue(static_cast(i)) {} + + bool isInt() const { + return Tag::Int == tag; + } + + int64_t toInt() const { + if (isInt()) { + return payload.u.as_int; + } else if (isSymInt()) { + return toSymInt().guard_int(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected int"); + } + } + + // Unsigned + IValue(uint64_t u) : tag( u <= std::numeric_limits::max() ? Tag::Int : Tag::UInt) { + payload.u.as_uint = u; + } + + + // See Note [Meaning of HAS_u] + // IValue type model closely follows that of c10::Scalar + // Where all integers are upcast to 64-bit representation, and `as_int` is used as default + // representation unless value could not be represented as signed int + bool isUnsigned() const { + return Tag::UInt == tag || (Tag::Int == tag && payload.u.as_int >= 0); + } + + uint64_t toUInt() const { + if (isUnsigned()) { + return payload.u.as_uint; + } else { + TORCH_INTERNAL_ASSERT(0, "expected unsigned int"); + } + } + + + // Bool + IValue(bool b) : tag(Tag::Bool) { +#if defined(__clang__) && defined(__x86_64__) + // Initializing entire payload stops valgrind's from reporting + // "jump or move depends on uninitialised value" in IValue copy constructor + // See https://github.com/pytorch/pytorch/issues/37117 + payload.u.as_int = b; +#else + payload.u.as_bool = b; +#endif + } + bool isBool() const { + return Tag::Bool == tag; + } + bool toBool() const { + if (isBool()) { + return payload.u.as_bool; + } else if (isSymBool()) { + return toSymBool().guard_bool(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected bool"); + } + } + + // IntList + bool isIntList() const; + bool isSymIntList() const; + c10::List toIntList() &&; + c10::List toIntList() const&; + std::vector toIntVector() const; + c10::List toSymIntList() &&; + c10::List toSymIntList() const&; + std::vector toSymIntVector() const; + at::DimVector toDimVector() const; + + // ConstantString + IValue(c10::intrusive_ptr v); + IValue(std::string v); + IValue(const char* v) : IValue(std::string(v)) {} + IValue(std::string_view v) : IValue(std::string(v)){} + bool isString() const { + return Tag::String == tag; + } + c10::intrusive_ptr toString() &&; + c10::intrusive_ptr toString() const&; + const std::string& toStringRef() const; + std::optional> toOptionalStringRef() + const; + std::string_view toStringView() const; + + // DoubleList + bool isDoubleList() const; + c10::List toDoubleList() &&; + c10::List toDoubleList() const&; + std::vector toDoubleVector() const; + + // ComplexDoubleList + bool isComplexDoubleList() const; + c10::List> toComplexDoubleList() &&; + c10::List> toComplexDoubleList() const&; + std::vector> toComplexDoubleVector() const; + + // BoolList + bool isBoolList() const; + c10::List toBoolList() &&; + c10::List toBoolList() const&; + + // TensorList + bool isTensorList() const; + c10::List toTensorList() &&; + c10::List toTensorList() const&; + std::vector toTensorVector() const; + + // OptionalTensorList + bool isOptionalTensorList() const; + c10::List> toOptionalTensorList() &&; + c10::List> toOptionalTensorList() const&; + std::vector> toOptionalTensorVector() const; + + // GenericList + IValue(c10::List v); + bool isList() const { + return Tag::GenericList == tag; + } + c10::List toList() &&; + c10::List toList() const&; + c10::ArrayRef toListRef() const; + + // Some template constructors of IValue calls another constructor recursively. + // This SFINAEs the called constructor exists. + template + using enable_if_ivalue_constructible = + std::enable_if_t, std::nullptr_t>; + + // The rule for lists is more complicated; the generic constructor is only + // acceptable if your element isn't SymInt. If you do have a SymInt element, + // then you must also, at construction time, check if you can decay the list + // into an int list (this is MANDATORY, as at a use site we may expect + // toIntList to work even if at the call site you had a SymIntArrayRef + // argument). In practice, only SymIntArrayRef is used this way, so we + // didn't bother making it work for the other constructors, we just make sure + // they're not selectable. + template + using enable_if_list_is_ivalue_constructible = std::enable_if_t< + std::is_constructible_v && !std::is_same_v, + std::nullptr_t>; + + template = nullptr> + IValue(c10::List&& v); + template = nullptr> + IValue(const c10::List& v); + template = nullptr> + IValue(at::ArrayRef v); + template = nullptr> + IValue(const std::vector& v); + template = nullptr> + IValue(std::vector&& v); + template + IValue(std::array v); + + // Manual constructors for lists of symints, which decay to int list if + // possible. To avoid ambiguous overload situations, we template them + // to prevent implicit conversions + template + using enable_if_symint = + std::enable_if_t, std::nullptr_t>; + + template = nullptr> + IValue(at::ArrayRef v); + template = nullptr> + IValue(at::OptionalArrayRef v); + template = nullptr> + IValue(const std::vector& v); + template = nullptr> + IValue(std::vector&& v); + + template + using enable_if_ilist_is_ivalue_constructible = std::enable_if_t< + std::is_constructible_v && + std::is_constructible_v::boxed_type> && + !std::is_same_v, + std::nullptr_t>; + + template = nullptr> + IValue(c10::IListRef v); + + // GenericDict + IValue(c10::Dict v); + bool isGenericDict() const { + return Tag::GenericDict == tag; + } + c10::Dict toGenericDict() &&; + c10::Dict toGenericDict() const&; + + template + IValue(c10::Dict v); + + template + /// \cond + /// DOXYGEN_CANNOT_HANDLE_CONSTRUCTORS_WITH_MACROS_SO_EXCLUDE_THIS_LINE_FROM_DOXYGEN + C10_DEPRECATED_MESSAGE( + "IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict instead.") + /// \endcond + IValue(std::unordered_map v); + + template = nullptr> + IValue(std::optional v); + template = nullptr> + IValue(c10::OptionalArrayRef v); + IValue(std::nullopt_t /*unused*/); + + // ClassType + IValue(c10::intrusive_ptr v); + bool isObject() const { + return tag == Tag::Object; + } + c10::intrusive_ptr toObject() &&; + c10::intrusive_ptr toObject() const&; + ivalue::Object& toObjectRef() const; + + torch::jit::Module toModule() const; + bool isModule() const; + + // PyObject + IValue(c10::intrusive_ptr v); + bool isPyObject() const { + return tag == Tag::PyObject; + } + c10::intrusive_ptr toPyObjectHolder() &&; + c10::intrusive_ptr toPyObjectHolder() const&; + PyObject* toPyObject() const; + + // Enum + explicit IValue(c10::intrusive_ptr v); + bool isEnum() const { + return tag == Tag::Enum; + } + c10::intrusive_ptr toEnumHolder() &&; + c10::intrusive_ptr toEnumHolder() const&; + + // None + IValue() = default; + bool isNone() const { + return Tag::None == tag; + } + std::string toNone() const { + AT_ASSERT(isNone()); + return "None"; + } + + static IValue uninitialized() { + auto i = IValue(); + i.tag = Tag::Uninitialized; + return i; + } + + // Scalar, which gets encoded as either an Int, a Double or a ComplexDouble + IValue(const at::Scalar& s) : IValue() { + // NB: do the symbolic versions first, as isFloatingPoint is true + // for both SymFloat and double + if (s.isSymInt()) { + tag = Tag::SymInt; + payload.u.as_intrusive_ptr = s.toSymInt().toSymNode().release(); + } else if (s.isSymFloat()) { + tag = Tag::SymFloat; + payload.u.as_intrusive_ptr = s.toSymFloat().toSymNodeImpl().release(); + } else if (s.isSymBool()) { + tag = Tag::SymBool; + payload.u.as_intrusive_ptr = s.toSymBool().toSymNodeImpl().release(); + } else if (s.isFloatingPoint()) { + tag = Tag::Double; + payload.u.as_double = s.toDouble(); + } else if (s.isComplex()) { + *this = s.toComplexDouble(); + } else if (s.isBoolean()) { + tag = Tag::Bool; + payload.u.as_bool = s.toBool(); + } else { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + s.isIntegral(false), "Unknown type in Scalar"); + if (s.isUnsigned()) { + const auto val = s.toUInt64(); + payload.u.as_uint = val; + tag = val <= std::numeric_limits::max() ? Tag::Int : Tag::UInt; + } else { + payload.u.as_int = s.toLong(); + tag = Tag::Int; + } + } + } + + bool isScalar() const { + return isDouble() || isInt() || isComplexDouble() || isBool() || + isSymInt() || isSymFloat() || isSymBool(); + } + + at::Scalar toScalar() const { + if (isDouble()) + return toDouble(); + else if (isInt()) + return toInt(); + else if (isComplexDouble()) + return toComplexDouble(); + else if (isBool()) + return toBool(); + else if (isSymInt()) + return toSymInt(); + else if (isSymFloat()) + return toSymFloat(); + else if (isSymBool()) + return toSymBool(); + else if (isUnsigned()) + return toUInt(); + TORCH_CHECK(false, "IValue is not a Scalar"); + } + + // Device + IValue(c10::Device d) : tag(Tag::Device) { + payload.u.as_device.type = d.type(); + payload.u.as_device.index = d.index(); + } + bool isDevice() const { + return Tag::Device == tag; + } + c10::Device toDevice() const { + AT_ASSERT(isDevice()); + return c10::Device(payload.u.as_device.type, payload.u.as_device.index); + } + + // Stream + IValue(c10::Stream s) : tag(Tag::Stream) { + auto v = c10::make_intrusive(s.pack3()); + payload.u.as_intrusive_ptr = v.release(); + } + c10::Stream toStream() &&; + c10::Stream toStream() const&; + bool isStream() const { + return Tag::Stream == tag; + } + + // ScalarType + IValue(ScalarType t) + : IValue(static_cast>(t)) {} + at::ScalarType toScalarType() const { + return static_cast(toInt()); + } + + // Layout + IValue(Layout l) : IValue(static_cast>(l)) {} + at::Layout toLayout() const { + return static_cast(toInt()); + } + + // MemoryFormat + IValue(MemoryFormat m) + : IValue(static_cast>(m)) {} + at::MemoryFormat toMemoryFormat() const { + return static_cast(toInt()); + } + + // QScheme + IValue(at::QScheme qscheme) : tag(Tag::Int) { + payload.u.as_int = static_cast(qscheme); + } + + at::QScheme toQScheme() const { + return static_cast(toInt()); + } + + // Dimname + IValue(at::Dimname dimname) : IValue(dimname.symbol().toQualString()) {} + + at::Dimname toDimname() const { + return at::Dimname::fromSymbol(Symbol::fromQualString(toStringRef())); + } + + // Generator + IValue(at::Generator g) : tag(Tag::Generator) { + payload.u.as_intrusive_ptr = + null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl()); + } + bool isGenerator() const { + return Tag::Generator == tag; + } + at::Generator toGenerator() &&; + at::Generator toGenerator() const&; + + // for debugging + std::string tagKind() const { + switch (tag) { +#define DEFINE_CASE(x) \ + case Tag::x: \ + return #x; + TORCH_FORALL_TAGS(DEFINE_CASE) +#undef DEFINE_CASE + } + return "InvalidTag(" + std::to_string(static_cast(tag)) + ")"; + } + + // generic v.to() implementations + // that can be used in special functions like pop/push + // that use template meta-programming. + // prefer the directly named methods when you can, + // since they are simpler to understand + + // Note: if you get linker errors saying one of these is missing, + // change it to ... && = delete; and you will see better error messages for + // why However, we cannot commit this because some compiler versions barf on + // it. + template + T to() &&; + template + typename c10::detail::ivalue_to_const_ref_overload_return::type to() + const&; + + // ToOptional: convert a IValue to the Optional obj that accepts both T and + // None + template + std::optional toOptional(); + template + std::optional toOptional() const; + + /// @private [doxygen private] + /// this is a shallow comparison of two IValues to test the object identity + bool isSameIdentity(const IValue& rhs) const; + + // Computes the "official" string representation of an IValue. This produces a + // TorchScript expression that can be used to recreate an IValue with the same + // value (e.g. when we are printing constants in the serializer). + // + // Callers can use `customFormatter` to override how `repr()` prints out an + // IValue. This is useful if you have some other environment where you can + // look up values, and you want to print a reference to that environment (like + // the serializer's constant table). + // + // repr() is not necessarily defined on all objects! + std::ostream& repr( + std::ostream& stream, + std::function customFormatter) + const; + + // Computes an "informal" string representation of an IValue. This should be + // used for debugging, or servicing `print()`-like functions. + // This is different from `repr()` in that there is no expectation that we can + // exactly reconstruct an IValue from the output; feel free to use a + // concise/pretty form + TORCH_API friend std::ostream& operator<<(std::ostream& out, const IValue& v); + + bool isPtrType() const { + if (isTensor()) { + return payload.as_tensor.defined(); + } + return isIntrusivePtrLegacyBehavior(); + } + + /// @private [doxygen private] + const void* internalToPointer() const { + TORCH_INTERNAL_ASSERT( + isPtrType(), "Can only call internalToPointer() for pointer types"); + if (isTensor()) { + return payload.as_tensor.unsafeGetTensorImpl(); + } else { + return payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() + ? payload.u.as_intrusive_ptr + : nullptr; + } + } + + template + TypePtr type() const; + + // Detect aliased tensors. + struct HashAliasedIValue { + size_t hashTensor(const at::Tensor& ten) const { + if (ten.is_sparse()) { + // COO sparse tensors have a "values" tensor and an "indices" tensor + // so this will detect overlap of sparse tensors that share a values + // tensor, but not sparse tensors that share an indices tensor. + return hashTensor(ten._values()); + } else if (ten.is_sparse_csr()) { + // COO sparse tensors have a "values" tensor and an "indices" tensor + // so this will detect overlap of sparse tensors that share a values + // tensor, but not sparse tensors that share an indices tensor. + return hashTensor(ten.values()); + } else if (!ten.has_storage()) { + // Opaque tensors such as the ones constructed by the MKL-DNN backend + // don't have storage so we just use their TensorImpls. + // TODO: Find way to expose alias info for opaque tensors. + return reinterpret_cast(ten.unsafeGetTensorImpl()); + } else { + return reinterpret_cast(ten.storage().unsafeGetStorageImpl()); + } + } + size_t operator()(const IValue& val) const { + if (val.isTensor()) { + return hashTensor(val.toTensor()); + } + // If it is not a Tensor, then two mutable IValues alias each other only + // if they are the same pointer. + return val.payload.u.as_int; + } + }; + + struct CompAliasedIValues { + bool operator()(const IValue& lhs, const IValue& rhs) const { + return lhs.isAliasOf(rhs); + } + }; + + using HashAliasedIValues = + std::unordered_set; + using HashAliasedIValueMap = + std::unordered_map; + + struct HashIdentityIValue { + size_t operator()(const IValue& val) const { + return val.payload.u.as_int; + } + }; + + struct CompIdentityIValues { + bool operator()(const IValue& lhs, const IValue& rhs) const { + return lhs.is(rhs); + } + }; + + using HashIdentityIValues = + std::unordered_set; + using HashIdentityIValueMap = + std::unordered_map; + + // Checks if this and rhs has a subvalues in common. + // [t1,t2] and [t2, t3] returns true. + bool overlaps(const IValue& rhs) const; + + // Inserts all subvalues of this in subValues. + void getSubValues(HashAliasedIValues& subValues) const; + + // Apply visitor to every subvalue. + // TODO: There are several places that recurse over IValue. This is fragile. + // This visitor should be used to recurse over ivalues. + void visit(const std::function& visitor) const; + IValue deepcopy(std::optional device = std::nullopt) const; + IValue deepcopy( + HashIdentityIValueMap& memo, + std::optional device = std::nullopt) const; + + private: + static c10::intrusive_ptr_target* null_to_undefined_tensor( + c10::intrusive_ptr_target* p) { + return p ? p + : static_cast( + c10::UndefinedTensorImpl::singleton()); + } + + static bool ptrEqual(const IValue& lhs, const IValue& rhs); + // NOTE: IValue tags are intentionally private. In the future we may encode + // this value different (e.g. using NaN boxing), and this would make it more + // costly to determine the tag for all types vs just determining if something + // is a particular type. Instead we want clients to use the `isX` methods when + // possible. If for performance reasons you really, absolutely, must have a jump + // table, then we can revisit this. + enum class Tag : uint32_t { +#define DEFINE_TAG(x) x, + TORCH_FORALL_TAGS(DEFINE_TAG) +#undef DEFINE_TAG + }; + +#define COUNT_TAG(x) 1 + + static constexpr auto kNumTags = TORCH_FORALL_TAGS(COUNT_TAG) 0; +#undef COUNT_TAG + + template < + class T, + class NullType = c10::detail::intrusive_target_default_null_type> + c10::intrusive_ptr moveToIntrusivePtr(); + template < + typename T, + class NullType = c10::detail::intrusive_target_default_null_type> + c10::intrusive_ptr toIntrusivePtr() const; + + void destroy() { + // We carefully construct this call to both 1) avoid UB by using + // the "wrong" one of as_tensor and as_intrusive_ptr and 2) enable + // the compiler to generate the same code for each case. It is + // surprisingly difficult to get this right. + if (isTensor() || isIntrusivePtr()) { + c10::intrusive_ptr_target* p = isTensor() + ? payload.as_tensor.unsafeGetTensorImpl() + : payload.u.as_intrusive_ptr; + c10::intrusive_ptr:: + reclaim(p); + // No need to make this destructor call! + // payload.as_tensor.~Tensor(); + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + C10_ALWAYS_INLINE void moveFrom(IValue&& rhs) noexcept { + if (rhs.isTensor()) { + new (&payload.as_tensor) at::Tensor(std::move(rhs.payload.as_tensor)); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // rhs.payload.as_tensor.~Tensor(); + } else { + payload.u = rhs.payload.u; + } + tag = rhs.tag; + rhs.clearToNone(); + } + + void clearToNone() noexcept { + payload.u.as_int = 0; + tag = Tag::None; + } + + private: + // This is the source of truth for isIntrusivePtr; edit results here + // as needed and isIntrusivePtr will pick them up. + // NOLINTBEGIN(bugprone-branch-clone) + static constexpr bool isIntrusivePtrConstexpr(Tag tag) { + switch (tag) { + case Tag::None: + return false; + case Tag::Tensor: + return false; + case Tag::Storage: + return true; + case Tag::Generator: + return true; + case Tag::Double: + return false; + case Tag::ComplexDouble: + return true; + case Tag::Int: + return false; + case Tag::UInt: + return false; + case Tag::SymInt: + return true; + case Tag::SymFloat: + return true; + case Tag::SymBool: + return true; + case Tag::Bool: + return false; + case Tag::Tuple: + return true; + case Tag::String: + return true; + case Tag::Blob: + return true; + case Tag::GenericList: + return true; + case Tag::GenericDict: + return true; + case Tag::Future: + return true; + case Tag::Await: + return true; + case Tag::Device: + return false; + case Tag::Stream: + return true; + case Tag::Object: + return true; + case Tag::PyObject: + return true; + case Tag::Uninitialized: + return false; + case Tag::Capsule: + return true; + case Tag::RRef: + return true; + case Tag::Quantizer: + return true; + case Tag::Enum: + return true; + } + return false; + } + // NOLINTEND(bugprone-branch-clone) + + public: + // Don't edit this just to add results for new tags; edit + // isIntrusivePtrConstexpr above. + bool isIntrusivePtr() const { + // Implementation NOTE: the switch in isIntrusivePtrConstexpr + // above is the previous production implementation of this + // function. We observed that, at least on x86_64, the generated + // instruction sequence was a similar bit vector test to what we + // have manually implemented below, except that there was an extra + // "bounds check" branch confirming, essentially, that `tag < + // kNumTags` and providing a consistent result in that case. We + // don't care about the result if tag is out of bounds, so we'd + // like to eliminate that comparison and branch; manually + // implementing this function as a bit test is the simplest way I + // could find to accomplish that elimination. + static constexpr uint32_t kTruthTableBitVector = +#define TRUTH_TABLE_ENTRY(tag) \ + (uint32_t(isIntrusivePtrConstexpr(Tag::tag)) << uint32_t(Tag::tag)) | + TORCH_FORALL_TAGS(TRUTH_TABLE_ENTRY) +#undef TRUTH_TABLE_ENTRY + 0; + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + static_cast(tag) < kNumTags, + "unexpected tag ", + static_cast(tag)); + return kTruthTableBitVector & (1 << (uint32_t(tag) % 32)); + } + + // Storage and Generator were treated specially when + // is_intrusive_ptr was stored as explicit state. This getter + // preserves the old behavior for use with WeakIValue for now. + bool isIntrusivePtrLegacyBehavior() const { + if (tag == Tag::Storage || tag == Tag::Generator) { + return payload.u.as_intrusive_ptr != + c10::UndefinedTensorImpl::singleton(); + } else { + return isIntrusivePtr(); + } + } + + union Payload { + // [TriviallyCopyablePayload] + // We use a nested union here so that we can make the copy easy + // and efficient in the non-tensor (i.e., trivially copyable) + // case. Specifically, we do not have to do a switch-on-tag to + // figure out which union member to assign; we can just use + // TriviallyCopyablePayload::operator=. + union TriviallyCopyablePayload { + TriviallyCopyablePayload() : as_int(0) {} + int64_t as_int; + // See Note [Meaning of HAS_u] + uint64_t as_uint; + double as_double; + bool as_bool; + // Invariant: never nullptr; null state is represented as + // c10::UndefinedTensorImpl::singleton() for consistency of + // representation with Tensor. + c10::intrusive_ptr_target* as_intrusive_ptr; + struct { + c10::DeviceType type; + DeviceIndex index; + } as_device; + } u; + static_assert(std::is_trivially_copyable_v); + at::Tensor as_tensor; + Payload() : u() {} + Payload(const Payload&) = delete; + Payload(Payload&&) = delete; + Payload& operator=(const Payload&) = delete; + Payload& operator=(Payload&&) = delete; + // NOLINTNEXTLINE(modernize-use-equals-default) + ~Payload() {} + }; + + IValue(const Payload& p, Tag t) : tag(t) { + if (isTensor()) { + new (&payload.as_tensor) at::Tensor(p.as_tensor); + } else { + payload.u = p.u; + } + } + + template + struct TagType {}; + + friend MaybeOwnedTraits; + + Payload payload; + Tag tag{IValue::Tag::None}; + friend struct WeakIValue; +}; + +struct TORCH_API WeakIValue final { + WeakIValue() = default; + + WeakIValue(const WeakIValue& rhs) + : payload(rhs.payload), + tag(rhs.tag), + is_intrusive_ptr(rhs.is_intrusive_ptr) { + if (is_intrusive_ptr && + payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::incref(payload.as_intrusive_ptr); + } + } + WeakIValue(const IValue& rhs) + : tag(rhs.tag), is_intrusive_ptr(rhs.isIntrusivePtrLegacyBehavior()) { + if (rhs.isTensor()) { + payload.as_intrusive_ptr = rhs.unsafeToTensorImpl(); + is_intrusive_ptr = true; + } else { + payload = rhs.payload.u; + } + if (is_intrusive_ptr) { + if (payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::incref(payload.as_intrusive_ptr); + } + } + } + WeakIValue(WeakIValue&& rhs) noexcept : WeakIValue() { + swap(rhs); + } + ~WeakIValue() { + if (is_intrusive_ptr && + payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::decref(payload.as_intrusive_ptr); + } + } + WeakIValue& operator=(WeakIValue&& rhs) & noexcept { + WeakIValue(std::move(rhs)).swap(*this); // this also sets rhs to None + return *this; + } + WeakIValue& operator=(WeakIValue const& rhs) & { + WeakIValue(rhs).swap(*this); + return *this; + } + void swap(WeakIValue& rhs) noexcept { + std::swap(payload, rhs.payload); + std::swap(is_intrusive_ptr, rhs.is_intrusive_ptr); + std::swap(tag, rhs.tag); + } + + bool isSameIdentity(const WeakIValue& rhs) const { + return payload.as_int == rhs.payload.as_int && tag == rhs.tag && + is_intrusive_ptr == rhs.is_intrusive_ptr; + } + + IValue lock() const { + if (!is_intrusive_ptr) { + IValue::Payload newPayload; + newPayload.u = payload; + return IValue(newPayload, tag); + } + if (IValue::Tag::Tensor == tag) { + auto temp = + c10::weak_intrusive_ptr:: + reclaim(static_cast(payload.as_intrusive_ptr)); + c10::intrusive_ptr ip( + temp.lock()); + temp.release(); + if (!ip) { + return IValue(); + } else { + return IValue(at::Tensor(std::move(ip))); + } + } else { + auto temp = c10::weak_intrusive_ptr::reclaim( + payload.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton() + ? nullptr + : payload.as_intrusive_ptr); + IValue::Payload pl; + pl.u.as_intrusive_ptr = temp.lock().release(); + temp.release(); + if (!pl.u.as_intrusive_ptr) { + return IValue(); + } else { + return IValue(pl, tag); + } + } + } + + size_t use_count() const noexcept { + if (!is_intrusive_ptr) { + return 1; + } + auto temp = c10::weak_intrusive_ptr< + c10::intrusive_ptr_target, + c10::UndefinedTensorImpl>::reclaim(payload.as_intrusive_ptr); + size_t result = temp.use_count(); + temp.release(); + return result; + } + + size_t weak_use_count() const noexcept { + if (!is_intrusive_ptr) { + return 1; + } + auto temp = c10::weak_intrusive_ptr< + c10::intrusive_ptr_target, + c10::UndefinedTensorImpl>::reclaim(payload.as_intrusive_ptr); + size_t result = temp.weak_use_count(); + temp.release(); + return result; + } + size_t hash() const { + return payload.as_int; + } + + private: + using Payload = IValue::Payload::TriviallyCopyablePayload; + Payload payload; + IValue::Tag tag{IValue::Tag::None}; + bool is_intrusive_ptr{false}; +}; + +// An owning pointer to a type. When the type is class type, it requires a pair +// of shared_ptrs to the class type and its owning CU, so that the class type is +// guaranteed to stay alive as long as we hold this object. +struct TORCH_API StrongTypePtr { + StrongTypePtr(std::shared_ptr cu, TypePtr type); + + std::shared_ptr cu_; + TypePtr type_; +}; + +// [Constant Object Weak CompilationUnit Reference] +// A non owning pointer to a type. When a class get inserted as a constant +// into a graph, if we used a strong pointer we would have a circular reference +// from Object -> CompilationUnit and CompilationUnit -> Graph (which owns the +// Constant Object) +struct TORCH_API WeakTypePtr { + WeakTypePtr(std::weak_ptr cu, TypePtr type); + + std::weak_ptr cu_; + TypePtr type_; +}; + +// internal build errors with std::variant :/ +struct WeakOrStrongCompilationUnit { + explicit WeakOrStrongCompilationUnit( + std::shared_ptr shared_cu) + : strong_ptr_(std::move(shared_cu)), weak_ptr_(std::nullopt) {} + + explicit WeakOrStrongCompilationUnit( + std::weak_ptr weak_cu) + : strong_ptr_(std::nullopt), weak_ptr_(std::move(weak_cu)) {} + + std::shared_ptr getStrongRefOrThrow() const { + TORCH_INTERNAL_ASSERT(strong_ptr_.has_value()); + return *strong_ptr_; + } + + std::weak_ptr getWeakRefOrThrow() const { + TORCH_INTERNAL_ASSERT(weak_ptr_.has_value()); + return *weak_ptr_; + } + + bool holdingStrongRef() const { + return strong_ptr_.has_value(); + } + + bool holdingEmptyStrongRef() const { + return strong_ptr_ == nullptr; + } + + std::optional> strong_ptr_; + std::optional> weak_ptr_; +}; + +// An Object will hold a non-owning Compilation Unit reference if it is a +// Constant in the graph and a Owning reference otherwise +struct TORCH_API WeakOrStrongTypePtr { + explicit WeakOrStrongTypePtr(WeakTypePtr weak) + : cu_(WeakOrStrongCompilationUnit(std::move(weak.cu_))), + type_(std::move(weak.type_)) {} + explicit WeakOrStrongTypePtr(StrongTypePtr strong) + : cu_(WeakOrStrongCompilationUnit(std::move(strong.cu_))), + type_(std::move(strong.type_)) {} + explicit WeakOrStrongTypePtr(WeakOrStrongCompilationUnit cu, TypePtr type) + : cu_(std::move(cu)), type_(std::move(type)) {} + WeakTypePtr asWeakTypePtr() const; + + WeakOrStrongCompilationUnit cu_; + TypePtr type_; + + bool holds_strong_ref() const { + return cu_.holdingStrongRef(); + } + + bool holds_empty_strong_ref() const { + return cu_.holdingEmptyStrongRef(); + } +}; + +} // namespace c10 + +C10_DIAGNOSTIC_POP() + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..e68c8ba8128d8e32b265e6e891dbead9794768a3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h @@ -0,0 +1,2578 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-default") + +namespace torch { +namespace jit { +struct Function; +struct CompilationUnit; +} // namespace jit +TORCH_API bool isCustomClass(const c10::IValue& v); +} // namespace torch +namespace c10 { +struct IValue; +struct ClassType; +struct TupleType; +struct EnumType; +struct InferredType; + +// For custom class __init__ registration, we need to pass in a function +// that looks like this: [](IValue x, args...) + +// However, make_boxed_from_unboxed_functor.h automatically sets the input types +// of the function by introspecting the types of the functor (which is IValue in +// this case). However, we need the type it binds to be Foo. + +// Instead, we pass in a lambda [](ivalue_holder x, args...) from +// which getTypePtr can recover the original class pointer. + +template +struct tagged_capsule { + IValue ivalue; +}; + +template +c10::intrusive_ptr IValue::moveToIntrusivePtr() { + auto t = c10::intrusive_ptr::reclaim( + payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton() + ? NullType::singleton() + : static_cast(payload.u.as_intrusive_ptr)); + clearToNone(); + return t; +} +template +c10::intrusive_ptr IValue::toIntrusivePtr() const { + if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { + return c10::intrusive_ptr(); + } + c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr); + return c10::intrusive_ptr::reclaim( + static_cast(payload.u.as_intrusive_ptr)); +} + +template +intrusive_ptr static_intrusive_pointer_cast(intrusive_ptr r) { + return intrusive_ptr::reclaim(static_cast(r.release())); +} + +template +intrusive_ptr dynamic_intrusive_pointer_cast(intrusive_ptr r) { + return intrusive_ptr::reclaim(dynamic_cast(r.release())); +} + +inline c10::intrusive_ptr IValue::toFuture() && { + AT_ASSERT(isFuture(), "Expected Future but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toFuture() const& { + AT_ASSERT(isFuture(), "Expected Future but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toAwait() && { + AT_ASSERT(isAwait(), "Expected Await but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toAwait() const& { + AT_ASSERT(isAwait(), "Expected Await but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toRRef() && { + AT_ASSERT(isRRef(), "Expected RRef but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toRRef() const& { + AT_ASSERT(isRRef(), "Expected RRef but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toQuantizer() && { + AT_ASSERT(isQuantizer(), "Expected Quantizer but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toQuantizer() const& { + AT_ASSERT(isQuantizer(), "Expected Quantizer but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toString() && { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toString() const& { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toObject() && { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toObject() const& { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue:: + toPyObjectHolder() && { + TORCH_INTERNAL_ASSERT(isPyObject(), "Expected PyObject but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toPyObjectHolder() + const& { + TORCH_INTERNAL_ASSERT(isPyObject(), "Expected PyObject but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toEnumHolder() && { + TORCH_INTERNAL_ASSERT(isEnum(), "Expected Enum but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toEnumHolder() const& { + TORCH_INTERNAL_ASSERT(isEnum(), "Expected Enum but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::complex IValue::toComplexDouble() const { + TORCH_INTERNAL_ASSERT(isComplexDouble(), "Expected ComplexDouble but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return (*ptr).val; +} +inline at::Tensor IValue::toTensor() && { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + auto result = std::move(payload.as_tensor); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // payload.as_tensor.~Tensor(); + clearToNone(); + return result; +} +inline at::Tensor& IValue::toTensor() & { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + return payload.as_tensor; +} +inline const at::Tensor& IValue::toTensor() const& { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + return payload.as_tensor; +} +inline c10::Storage IValue::toStorage() && { + AT_ASSERT(isStorage(), "Expected Storage but got ", tagKind()); + return c10::Storage( + moveToIntrusivePtr()); +} +inline c10::Storage IValue::toStorage() const& { + AT_ASSERT(isStorage(), "Expected Storage but got ", tagKind()); + return c10::Storage(toIntrusivePtr()); +} +inline c10::Stream IValue::toStream() && { + AT_ASSERT(isStream(), "Expected Stream but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return c10::Stream::unpack3((*ptr).val.stream_id, + (*ptr).val.device_index, + (*ptr).val.device_type); +} +inline c10::Stream IValue::toStream() const& { + AT_ASSERT(isStream(), "Expected Stream but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return c10::Stream::unpack3((*ptr).val.stream_id, + (*ptr).val.device_index, + (*ptr).val.device_type); +} +inline c10::intrusive_ptr IValue::toBlob() && { + AT_ASSERT(isBlob(), "Expected Blob but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toBlob() const& { + AT_ASSERT(isBlob(), "Expected Blob but got ", tagKind()); + return toIntrusivePtr(); + ; +} +inline c10::intrusive_ptr IValue::toCapsule() && { + TORCH_INTERNAL_ASSERT(isCapsule()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toCapsule() const& { + TORCH_INTERNAL_ASSERT(isCapsule()); + return toIntrusivePtr(); +} +inline at::Generator IValue::toGenerator() && { + AT_ASSERT(isGenerator(), "Expected Generator but got ", tagKind()); + return at::Generator(moveToIntrusivePtr()); +} +inline at::Generator IValue::toGenerator() const& { + AT_ASSERT(isGenerator(), "Expected Generator but got ", tagKind()); + return at::Generator(toIntrusivePtr()); +} +inline c10::SymInt IValue::toSymInt() && { + AT_ASSERT(isSymInt() || isInt(), "Expected SymInt or int but got ", tagKind()); + if (isSymInt()) { + return c10::SymInt(moveToIntrusivePtr()); + } else { + return c10::SymInt(payload.u.as_int); + } +} +inline c10::SymInt IValue::toSymInt() const& { + AT_ASSERT(isSymInt() || isInt(), "Expected SymInt or int but got ", tagKind()); + if (isSymInt()) { + return c10::SymInt(toIntrusivePtr()); + } else { + return c10::SymInt(payload.u.as_int); + } +} +inline c10::SymFloat IValue::toSymFloat() && { + AT_ASSERT(isSymFloat() || isDouble(), "Expected SymFloat or double but got ", tagKind()); + if (isSymFloat()) { + return c10::SymFloat(moveToIntrusivePtr()); + } else { + return c10::SymFloat(payload.u.as_double); + } +} +inline c10::SymFloat IValue::toSymFloat() const& { + AT_ASSERT(isSymFloat() || isDouble(), "Expected SymFloat or double but got ", tagKind()); + if (isSymFloat()) { + return c10::SymFloat(toIntrusivePtr()); + } else { + return c10::SymFloat(payload.u.as_double); + } +} +inline c10::SymBool IValue::toSymBool() && { + AT_ASSERT(isSymBool() || isBool(), "Expected SymBool or boolean but got ", tagKind()); + if (isSymBool()) { + return c10::SymBool(moveToIntrusivePtr()); + } else { + return c10::SymBool(payload.u.as_bool); + } +} + +inline c10::SymBool IValue::toSymBool() const& { + AT_ASSERT(isSymBool() || isBool(), "Expected SymBool or boolean but got ", tagKind()); + if (isSymBool()) { + return c10::SymBool(toIntrusivePtr()); + } else { + return c10::SymBool(payload.u.as_bool); + } +} + +namespace ivalue { + +void TORCH_API +checkCustomClassType(const ClassType* expected_type, const Type* actual_type); + +template +using Shared = c10::intrusive_ptr; + +// string +struct TORCH_API ConstantString final : c10::intrusive_ptr_target { + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string str_; + + public: + ConstantString(std::string str) : str_(std::move(str)) {} + ConstantString(std::string_view str) : str_(std::string(str)) {} + static c10::intrusive_ptr create(std::string str_); + static c10::intrusive_ptr create(std::string_view str_); + static c10::intrusive_ptr create(const char* str_); + + const std::string& string() const { + return str_; + } + std::string_view string_view() const { + return str_; + } + + operator const std::string&() const { + return string(); + } + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const ConstantString& v); +}; + +struct Future; + +struct TORCH_API TupleElements { + private: + size_t inlineSize_; + // We represent TupleElements this way to save doing a heap + // allocation in the common (at least for unpickling) case where we + // have only 3 elements. We have our own union instead of + // c10::SmallVector because c10::SmallVector always + // stores the begin/end/capacity pointers, which would be a waste of + // space in our use case. + union { + std::vector elementsVector_; + // Don't want to declare a std::array because the convenient + // iteration and size members are a footgun in this case -- the + // actual size of the array may be smaller than 3! + // NOLINTNEXTLINE(*c-arrays*) + IValue elementsInline_[3]; + }; + + void destroyInline() { + for (const auto ii : c10::irange(inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + public: + + using iterator = IValue*; + using const_iterator = const IValue*; + + TupleElements() : inlineSize_(0) { + new (&elementsVector_) std::vector(); + } + + explicit TupleElements(std::vector elements) + : inlineSize_(0), elementsVector_(std::move(elements)) {} + + explicit TupleElements(c10::ArrayRef elements) + : inlineSize_(elements.size() <= 3 ? elements.size() : 0) { + switch (inlineSize_) { + case 3: + new (&elementsInline_[2]) IValue(elements[2]); + [[fallthrough]]; + case 2: + new (&elementsInline_[1]) IValue(elements[1]); + [[fallthrough]]; + case 1: + new (&elementsInline_[0]) IValue(elements[0]); + break; + case 0: + new (&elementsVector_) std::vector(elements.begin(), elements.end()); + break; + } + } + + explicit TupleElements(IValue&& e1) + : inlineSize_(1) { + new (&elementsInline_[0]) IValue(std::move(e1)); + } + + explicit TupleElements(IValue&& e1, IValue&& e2) + : inlineSize_(2) { + new (&elementsInline_[0]) IValue(std::move(e1)); + new (&elementsInline_[1]) IValue(std::move(e2)); + } + + explicit TupleElements(IValue&& e1, IValue&& e2, IValue&& e3) + : inlineSize_(3) { + new (&elementsInline_[0]) IValue(std::move(e1)); + new (&elementsInline_[1]) IValue(std::move(e2)); + new (&elementsInline_[2]) IValue(std::move(e3)); + } + + ~TupleElements() { + if (inlineSize_) { + destroyInline(); + } else { + elementsVector_.~vector(); + } + } + + // It would be nice to make this noncopyable to prevent people from + // writing code like `auto output = + // forward(...).toTupleRef().elements()` (which does refcount bumps on + // each element, unlike the more efficient but verbose + // ``` + // auto outputIntrusivePtr = forward(...).toTuple(); + // const auto& output = outputIntrusivePtr->elements(); + // ``` + // ), but there is simply an overwhelming amount of code that does + // it the inefficient way. + // See also operator std::vector below. + TupleElements(const TupleElements& rhs) + : inlineSize_(rhs.inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + new (&elementsVector_) std::vector(rhs.elementsVector_); + } + } + + TupleElements& operator=(const TupleElements& rhs) { + if (inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(std::min(inlineSize_, rhs.inlineSize_))) { + elementsInline_[ii] = rhs.elementsInline_[ii]; + } + if (rhs.inlineSize_ > inlineSize_) { + for (const auto ii : c10::irange(inlineSize_, rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + for (const auto ii : c10::irange(rhs.inlineSize_, inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + } else { + destroyInline(); + new (&elementsVector_) std::vector(rhs.elementsVector_); + } + } else { + if (rhs.inlineSize_) { + elementsVector_.~vector(); + for (const auto ii : c10::irange(rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + elementsVector_ = rhs.elementsVector_; + } + } + inlineSize_ = rhs.inlineSize_; + return *this; + } + + TupleElements(TupleElements&& rhs) noexcept + : inlineSize_(rhs.inlineSize_) { + if (inlineSize_) { + for (const auto ii : c10::irange(inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + new (&elementsVector_) std::vector(std::move(rhs.elementsVector_)); + } + } + + TupleElements& operator=(TupleElements&& rhs) noexcept { + if (inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(std::min(inlineSize_, rhs.inlineSize_))) { + elementsInline_[ii] = std::move(rhs.elementsInline_[ii]); + } + if (rhs.inlineSize_ > inlineSize_) { + for (const auto ii : c10::irange(inlineSize_, rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + for (const auto ii : c10::irange(rhs.inlineSize_, inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + } else { + destroyInline(); + new (&elementsVector_) std::vector(std::move(rhs.elementsVector_)); + } + } else { + if (rhs.inlineSize_) { + elementsVector_.~vector(); + for (const auto ii : c10::irange(rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + elementsVector_ = std::move(rhs.elementsVector_); + } + } + inlineSize_ = rhs.inlineSize_; + return *this; + } + + [[nodiscard]] c10::ArrayRef asArrayRef() const { + if (inlineSize_) { + return c10::ArrayRef(elementsInline_, inlineSize_); + } else { + return elementsVector_; + } + } + + // Mimic implicit conversion from std::vector to ArrayRef. + operator c10::ArrayRef() const { + return asArrayRef(); + } + + static size_t hash(const TupleElements& v) { + return c10::hash>()(v.asArrayRef()); + } + + void setContents(std::vector&& contents) { + if (inlineSize_) { + destroyInline(); + new (&elementsVector_) std::vector(std::move(contents)); + inlineSize_ = 0; + } else { + elementsVector_ = std::move(contents); + } + } + + [[nodiscard]] bool empty() const { + return inlineSize_ ? false : elementsVector_.empty(); + } + + [[nodiscard]] size_t size() const { + return inlineSize_ ? inlineSize_ : elementsVector_.size(); + } + + [[nodiscard]] IValue& operator[](size_t idx) { + if (inlineSize_) { + return elementsInline_[idx]; + } else { + return elementsVector_[idx]; + } + } + + [[nodiscard]] const IValue& operator[](size_t idx) const { + if (inlineSize_) { + return elementsInline_[idx]; + } else { + return elementsVector_[idx]; + } + } + + [[nodiscard]] IValue& at(size_t idx) { + if (inlineSize_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3); + TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_); + return elementsInline_[idx]; + } else { + return elementsVector_.at(idx); + } + } + + [[nodiscard]] const IValue& at(size_t idx) const { + if (inlineSize_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3); + TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_); + return elementsInline_[idx]; + } else { + TORCH_CHECK(idx < elementsVector_.size(), "TupleElements: invalid index Index = ", idx, "; Length = ", elementsVector_.size()); + return elementsVector_.at(idx); + } + } + + [[nodiscard]] iterator begin() { + if (inlineSize_) { + return elementsInline_; + } else { + return elementsVector_.data(); + } + } + + [[nodiscard]] iterator end() { + if (inlineSize_) { + return elementsInline_ + inlineSize_; + } else { + return elementsVector_.data() + elementsVector_.size(); + } + } + + [[nodiscard]] const_iterator begin() const { + if (inlineSize_) { + return elementsInline_; + } else { + return elementsVector_.data(); + } + } + + [[nodiscard]] const_iterator end() const { + if (inlineSize_) { + return elementsInline_ + inlineSize_; + } else { + return elementsVector_.data() + elementsVector_.size(); + } + } + + [[nodiscard]] const_iterator cbegin() const { + return begin(); + } + + [[nodiscard]] const_iterator cend() const { + return end(); + } + + [[nodiscard]] std::vector vec() const& { + return asArrayRef().vec(); + } + + [[nodiscard]] IValue& back() { + return *(end() - 1); + } + + [[nodiscard]] const IValue& back() const { + return *(end() - 1); + } + + [[nodiscard]] std::vector vec() && { + std::vector result; + result.reserve(size()); + for (auto&& iv : *this) { + result.push_back(std::move(iv)); + } + return result; + } + + // More compatibility shims for the overwhelming amount of code that + // likes to copy tuple elements into a vector; see comment above the + // copy constructor. + operator std::vector() const & { + return vec(); + } + + operator std::vector() && { + return vec(); + } +}; + +template +struct TupleTypeFactory {}; + +template <> +struct TORCH_API TupleTypeFactory { + static TupleTypePtr create(std::vector types) { + return TupleType::create(std::move(types)); + } + static TupleTypePtr fallback(const Type& type); +}; + +template <> +struct TORCH_API TupleTypeFactory { + static DynamicTypePtr create(const std::vector& elemTypes); + static DynamicTypePtr fallback(const Type& /*unused*/); +}; + +struct TORCH_API Tuple : c10::intrusive_ptr_target { + private: + TupleElements elements_; + mutable c10::TypePtr type_; // lazily computed for unnamed tuples + + public: + // named tuples have additional type information, so we + // directly create them tagged + static c10::intrusive_ptr createNamed( + std::vector elements_, + c10::TypePtr type_) { + return c10::make_intrusive(std::move(elements_), std::move(type_)); + } + + static c10::intrusive_ptr createNamed( + TupleElements elements_, + std::shared_ptr type_) { + return c10::make_intrusive(std::move(elements_), std::move(type_)); + } + + static c10::intrusive_ptr createNamed( + std::initializer_list elements_, + std::shared_ptr type_) { + return createNamed(TupleElements(c10::ArrayRef(elements_)), std::move(type_)); + } + + // MSVC apparently can't disambiguate the other two overloads of + // create when passed an initializer_list without this. + static c10::intrusive_ptr create(std::initializer_list elements_) { + return create(c10::ArrayRef(elements_)); + } + + static c10::intrusive_ptr create(std::vector elements_) { + return c10::make_intrusive(std::move(elements_)); + } + + static c10::intrusive_ptr create(TupleElements elements_) { + return c10::make_intrusive(std::move(elements_)); + } + + static c10::intrusive_ptr create(c10::ArrayRef elements_) { + return create(TupleElements(elements_)); + } + + static c10::intrusive_ptr create(IValue e1) { + return c10::make_intrusive(std::move(e1)); + } + + static c10::intrusive_ptr create(IValue e1, IValue e2) { + return c10::make_intrusive(std::move(e1), std::move(e2)); + } + + static c10::intrusive_ptr create(IValue e1, IValue e2, IValue e3) { + return c10::make_intrusive(std::move(e1), std::move(e2), std::move(e3)); + } + + private: + // Workaround inability to use `>` operator in template argument list. + template + static constexpr bool hasMoreThanThreeArgs() { + return sizeof...(Args) > 3; + } + + public: + template + static c10::intrusive_ptr create(Args&&... elements_) { + switch (sizeof...(Args)) { + case 1: + case 2: + case 3: + return create(IValue(std::forward(elements_))...); + default: + return create( + std::vector{IValue(std::forward(elements_))...}); + } + } + + // Again, it would be nice to make this noncopyable, but there's a + // lot of extant code that copies Tuples. + // Tuple(const Tuple& rhs) = delete; + + const TupleElements& elements() const& { + return elements_; + } + + TupleElements elements() && { + return std::move(elements_); + } + + void setElements(std::vector&& elements) { + elements_.setContents(std::move(elements)); + } + + void setElements(TupleElements&& elements) { + elements_ = std::move(elements); + } + + void unsafeSetElement(size_t idx, const IValue& element) { + elements_[idx] = element; + } + + void unsafeSetElement(size_t idx, IValue&& element) { + elements_[idx] = std::move(element); + } + + size_t size() const { + return elements_.size(); + } + + template + std::shared_ptr type() const { + if (!type_) { + type_ = TupleTypeFactory::create(fmap(elements(), [&](const IValue& v) { + return v.type(); + })); + } + if (auto t = type_->cast()) { + return t; + } + return TupleTypeFactory::fallback(*type_); + } + + static size_t hash(const Tuple& t) { + return c10::get_hash(t.elements()); + } + + TORCH_API friend bool operator==( + const ivalue::Tuple& lhs, + const ivalue::Tuple& rhs); + + private: + // NOTE: If we try to avoid the overloads without + // `std::shared_ptr type` by defaulting it to nullptr, we + // end up having to call (part of) the shared_ptr destructor for + // `type` even though we should know statically it won't do + // anything. + explicit Tuple(std::vector elements) + : elements_(std::move(elements)){} + + explicit Tuple(std::vector elements, c10::TypePtr type) + : elements_(std::move(elements)), type_(std::move(type)) {} + + explicit Tuple(TupleElements&& elements) + : elements_(std::move(elements)) {} + + explicit Tuple(TupleElements&& elements, std::shared_ptr type) + : elements_(std::move(elements)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1) + : elements_(std::move(e1)) {} + + explicit Tuple(IValue&& e1, std::shared_ptr type) + : elements_(std::move(e1)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1, IValue&& e2) + : elements_(std::move(e1), std::move(e2)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, std::shared_ptr type) + : elements_(std::move(e1), std::move(e2)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, IValue&& e3) + : elements_(std::move(e1), std::move(e2), std::move(e3)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, IValue&& e3, std::shared_ptr type) + : elements_(std::move(e1), std::move(e2), std::move(e3)), type_(std::move(type)) {} + + friend class c10::intrusive_ptr; +}; + +struct Object; +struct PyObjectHolder; +struct EnumHolder; +} // namespace ivalue + +// Future +struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target { + private: + // Keep this private in order to force users to go through make_intrusive and + // thus prevent creating a Future that's not held by an intrusive_ptr. + explicit Future(TypePtr type, std::vector devices={}) + : type_(std::move(type)), + impl_(getTypeOfDevices(devices)), + devices_(sortAndDeduplicateDevices(impl_, std::move(devices))) {} + + friend c10::intrusive_ptr; + + struct FutureCallback { + std::function callback; + bool uses_future; // whether the Future& passed in is actually used + + template + FutureCallback(T callback, bool uses_future) + : callback(std::move(callback)), uses_future(uses_future) {} + }; + + public: + Future(const Future&) = delete; + Future(Future&&) = delete; + Future& operator=(const Future&) = delete; + Future& operator=(Future&&) = delete; + + // Destructor + // Explicitly destroy events under device guard, otherwise it can lead to + // extra context being created on device 0. Reason: python garbage collector + // calls this destructor, but python GC does not have a device context, so a + // "default" one (usually on device 0) could be created when we go down the + // line of event destroy. + ~Future() override { + while (!events_.empty()) { + c10::OptionalDeviceGuard deviceGuard(events_.back().device()); + events_.pop_back(); + } + } + + struct TORCH_API FutureError final : public std::exception { + explicit FutureError(std::string&& error_msg_) + : error_msg(std::move(error_msg_)) {} + + FutureError() = default; + + const char* what() const noexcept override { + return error_msg.c_str(); + } + + std::string error_msg; + }; + + /** + * Wait on the future until it completes. + */ + void wait() { + std::unique_lock lock(mutex_); + finished_cv_.wait(lock, [&]() -> bool { return completed_; }); + synchronizeWithCurrentStreams(); + } + + /** + * Wait on the future until it completes and throw an + * exception if an error exists. + */ + void waitAndThrow() { + wait(); + + if (eptr_) { + std::rethrow_exception(eptr_); + } + } + + /** + * Explicitly mark the future as completed with the output value. Optionally, + * the storages for all tensors in IValue can be passed as well. The DataPtrs + * of these storages are used to synchronize CUDA streams. If storages isn't + * given we will attempt to extract it from the value, if we need to (this + * happens if a non-empty set of devices was given to the constructor). Thus + * one only needs to provide storages when 1) they cannot be extracted through + * IValue::getSubValues() or through pickling in case of Python object; or + * when 2) customized storage extraction is more efficient. + */ + using WeakStorage = c10::weak_intrusive_ptr; + void markCompleted( + IValue value, + std::optional> storages = std::nullopt) { + // Start by performing all steps that can throw, before setting any field. + // Do this before even acquiring the mutex, because extractStorages might + // acquire the GIL, which could lead to a lock inversion with our mutex. + // See https://github.com/pytorch/pytorch/issues/58239. + std::vector actualStorages; + std::vector usedDevices; + try { + // FIXME We should always extract DataPtrs, in order to catch the case of + // users using CUDA values but forgetting to set devices, which currently + // leads to a silent synchronization/correctness issue. However, as this + // might worsen perf in CPU-only cases, we should only do so after careful + // benchmarks. + if (impl_.type() != c10::kCPU) { + actualStorages = + storages.has_value() ? std::move(*storages) : extractStorages(value); + usedDevices = getDevicesOfStorages(impl_, actualStorages); + ensureIsSubsetOfDevices(usedDevices, devices_); + } + } catch (const std::exception&) { + setError(std::current_exception()); + return; + } + + std::unique_lock lock(mutex_); + TORCH_CHECK( + !completed(), + "Attempting to mark a completed Future as complete again. Note that " + "a Future can only be marked completed once."); + + // Only set value_ and completed_ flag once all checks and preparation steps + // have returned successfully to allow for proper error propagation. + value_ = std::move(value); + completed_ = true; + + currentDevice_ = impl_.getDevice(); + storages_ = std::move(actualStorages); + for (const c10::Device& device : usedDevices) { + c10::Event event(impl_.type()); + event.record(impl_.getStream(device)); + events_.push_back(std::move(event)); + } + + std::vector cbs; + cbs.swap(callbacks_); + lock.unlock(); + + finished_cv_.notify_all(); + for (const auto& callback : cbs) { + invokeCallback(callback.callback, callback.uses_future); + } + } + + void markCompleted() { + markCompleted(IValue{}); + } + + void setError(std::exception_ptr eptr) { + std::unique_lock lock(mutex_); + setErrorInternal(std::move(eptr), lock); + } + + void setErrorIfNeeded(std::exception_ptr eptr) { + std::unique_lock lock(mutex_); + if (completed_) { + // This should be rare and shouldn't cause log spew. Its important to + // log errors and that's why we have this log here. + std::string msg = c10::str( + "Skipping setting following error on the Future since " + "it is already marked completed (this is not necessarily " + "an error):\n", + tryRetrieveErrorMessageInternal(std::move(eptr))); + if (eptr_) { + msg += c10::str( + ", \nOriginal exception:\n", + tryRetrieveErrorMessageInternal(eptr_)); + } + LOG(INFO) << msg; + return; + } else { + setErrorInternal(std::move(eptr), lock); + } + } + + // Get the result of the current future. + IValue value() { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + if (eptr_) { + std::rethrow_exception(eptr_); + } + return value_; + } + + // This accessor should only be used if we know that the future is + // completed() with no error. + const IValue& constValue() const { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + TORCH_INTERNAL_ASSERT( + !eptr_, + "value() accessor should only be used when future is not completed with ", + "an error, but future had the following error: ", + tryRetrieveErrorMessageInternal(eptr_) + ); + return value_; + } + + // This accessor should only be used if we know that the future is + // completed() with no error. + const std::vector& storages() const { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + AT_ASSERT(!eptr_); + return storages_; + } + + /** + * Add a callback to the future. + * The callbacks will be executed once the future completes. + * If the future has already completed, + * this function will execute the callback immediately. + */ + template + void addCallback(T callback, bool uses_future = true) { + static_assert( + std::is_invocable_r_v, + "The callback must have signature void(Future&)"); + + std::unique_lock lock(mutex_); + if (completed()) { + lock.unlock(); + invokeCallback(callback, uses_future); + return; + } + callbacks_.emplace_back(std::move(callback), uses_future); + } + + /** + * Add a callback to the future, and return another Future to hold the return + * value of the callback. This is necessary when the callback provider needs + * to know for sure when the callback has finished. + */ + template + c10::intrusive_ptr then(T callback, TypePtr type) { + using IValueWithStorages = std::tuple>; + static_assert( + std::disjunction_v< + std::is_invocable_r, + std::is_invocable_r>, + "The callback must have signature IValue(Future&) or " + "std::tuple>(Future&)"); + + auto childFut = createInstance(::std::move(type)); + addCallback([childFut, + cb = std::move(callback)](Future& parentFut) { + try { + if constexpr (::std::is_convertible_v, IValueWithStorages>) { + auto [ivalue, storages] = cb(parentFut); + childFut->markCompleted(::std::move(ivalue), ::std::move(storages)); + } else { + childFut->markCompleted(cb(parentFut)); + } + } catch (std::exception&) { + childFut->setError(std::current_exception()); + } + }); + return childFut; + } + + template + c10::intrusive_ptr thenAsync(T callback, TypePtr type) { + static_assert( + std::is_invocable_r_v, T, Future&>, + "The callback must have signature c10::intrusive_ptr(Future&)"); + + auto childFut = createInstance(std::move(type)); + addCallback( + [childFut, cb = std::move(callback)](Future& parentFut) mutable { + c10::intrusive_ptr intermediateFut; + try { + intermediateFut = cb(parentFut); + } catch (std::exception&) { + childFut->setError(std::current_exception()); + return; + } + intermediateFut->addCallback( + [childFut = std::move(childFut)](Future& intermediateFut) { + if (intermediateFut.hasError()) { + childFut->setError(intermediateFut.exception_ptr()); + } else { + childFut->markCompleted( + intermediateFut.value(), intermediateFut.storages()); + } + }); + }); + return childFut; + } + + // Tries to retrieve the error message from std::exception_ptr. + std::string tryRetrieveErrorMessage() const { + TORCH_CHECK(hasError(), "No error present on the future."); + std::unique_lock lock(mutex_); + return tryRetrieveErrorMessageInternal(eptr_); + } + + // Check if the current future has completed + bool completed() const { + return completed_; + } + + bool hasValue() const { + std::unique_lock lock(mutex_); + return completed_ && !eptr_; + } + + bool hasError() const { + std::unique_lock lock(mutex_); + return eptr_ ? true : false; + } + + std::exception_ptr exception_ptr() const { + std::unique_lock lock(mutex_); + return eptr_; + } + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const Future& v); + + const TypePtr& elementType() const { + return type_; + } + + const std::vector& devices() const { + return devices_; + } + + // This method should be used when one intends to manually create a child + // future, for example when implementing a customized version of then(). + c10::intrusive_ptr createInstance(at::TypePtr type) { + return c10::make_intrusive(std::move(type), devices_); + } + + private: + + // This method should always be used when invoking a callback (regardless of + // how/when that happens) as it will ensure that the proper "environment" is + // set up before running the callback, as in, it will set up the CUDA streams, + // synchronize them with the value, and so on (if needed). + template + void invokeCallback(T& callback, bool uses_future) { + static_assert( + std::is_invocable_r_v, + "The callback must have signature void(Future&)"); + + // The synchronization performed below shouldn't be needed when the future + // is not used by the callback. + if (uses_future) { + c10::OptionalDeviceGuard deviceGuard(currentDevice_); + + std::vector streams; + streams.reserve(devices_.size()); + for (const c10::Device& device : devices_) { + streams.push_back(impl_.getStreamFromGlobalPool(device)); + } + c10::MultiStreamGuard streamGuard(streams); + synchronizeWithCurrentStreams(); + callback(*this); + } else { + callback(*this); + } + } + + // This method should be called before this future's value is used, as it + // ensures that the CUDA streams that are "current" at the callsite properly + // synchronize with the value. + void synchronizeWithCurrentStreams() { + for (c10::Event& event : events_) { + event.block(impl_.getStream(event.device())); + } + + for (const WeakStorage& weak_storage : storages_) { + c10::intrusive_ptr storage = weak_storage.lock(); + if (!storage) { + continue; + } + if (!storage->device().is_cpu()) { + impl_.recordDataPtrOnStream( + storage->data_ptr(), impl_.getStream(storage->device())); + } + } + } + + void setErrorInternal( + std::exception_ptr eptr, + std::unique_lock& lock) { + TORCH_CHECK( + !eptr_, + "Error already set on this Future: ", + tryRetrieveErrorMessageInternal(eptr_), + ", trying to set error: ", + tryRetrieveErrorMessageInternal(eptr)); + TORCH_INTERNAL_ASSERT(!completed(), "Future is already marked completed"); + completed_ = true; + eptr_ = std::move(eptr); + + std::vector cbs; + cbs.swap(callbacks_); + lock.unlock(); + + finished_cv_.notify_all(); + for (const auto& callback : cbs) { + invokeCallback(callback.callback, callback.uses_future); + } + } + + // Tries to retrieve the error message from std::exception_ptr. + std::string tryRetrieveErrorMessageInternal(std::exception_ptr eptr) const { + try { + std::rethrow_exception(std::move(eptr)); + } catch (const std::exception& e) { + return e.what(); + } catch (...) { + return "Unknown Exception Type"; + } + } + + // Defined in ivalue.cpp. + static std::vector extractStorages( + const at::IValue& value); + + static std::vector getDevicesOfStorages( + const c10::impl::VirtualGuardImpl& impl, + const std::vector& storages) { + c10::DeviceIndex deviceCount = impl.deviceCount(); + std::vector isDeviceUsed(deviceCount, false); + for (const WeakStorage& weak_storage : storages) { + c10::intrusive_ptr storage = weak_storage.lock(); + if (!storage) { + continue; + } + c10::Device device = storage->device(); + if (!device.is_cpu()) { + TORCH_CHECK_VALUE( + device.type() == impl.type(), + "Expected all data ptrs to be on a device of type ", + impl.type(), + ", got one on device ", + device); + isDeviceUsed[device.index()] = true; + } + } + std::vector devices; + for (c10::DeviceIndex idx = 0; idx < deviceCount; idx++) { + if (isDeviceUsed[idx]) { + devices.emplace_back(impl.type(), idx); + } + } + return devices; + } + + static std::string formatSetOfDevices( + const std::vector& devices) { + if (devices.empty()) { + return "(none)"; + } + std::ostringstream oss; + oss << devices[0]; + for (const auto idx : c10::irange(1, devices.size())) { + if (idx == devices.size() - 1) { + oss << " and "; + } else { + oss << ", "; + } + oss << devices[idx]; + } + return oss.str(); + } + + static c10::DeviceType getTypeOfDevices( + const std::vector& devices) { + if (devices.empty()) { + return c10::kCPU; + } + c10::DeviceType deviceType = devices[0].type(); + for (const auto idx : c10::irange(1, devices.size())) { + TORCH_CHECK_VALUE( + devices[idx].type() == deviceType, + "Expected all devices to be of the same type, but got a mismatch between ", + devices[0], + " and ", + devices[idx]); + } + return deviceType; + } + + // We need devices to be sorted in order to use ensureIsSubsetOfDevices. + static std::vector sortAndDeduplicateDevices( + const c10::impl::VirtualGuardImpl& /*impl*/, + std::vector devices) { + std::sort( + devices.begin(), devices.end(), + [](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); }); + // Deduplicate by compacting. + size_t targetIdx = 0; + for (const auto sourceIdx : c10::irange(devices.size())) { + TORCH_CHECK_VALUE( + devices[sourceIdx].has_index(), + "Expected devices to have indices, got ", devices[sourceIdx]); + if (targetIdx > 0 && devices[targetIdx - 1].index() == devices[sourceIdx].index()) { + // It's a duplicate, skip it. + continue; + } + if (sourceIdx != targetIdx) { + devices[targetIdx] = devices[sourceIdx]; + } + targetIdx++; + } + // If there were duplicates there's now a gap at the end: trim it. Resizing + // requires the item type to be default-constructible (which c10::Device is + // not) because in principle it could be required to create new items. Since + // we know we'll shrink the vector, we provide a custom dummy value instead. + devices.resize(targetIdx, c10::Device(c10::kCPU)); + return devices; + } + + static void ensureIsSubsetOfDevices( + const std::vector& subset, + const std::vector& superset) { + // We assume the devices in both vectors have the same consistent type, and + // their indices are unique and sorted. + std::vector excessDevices; + std::set_difference( + subset.begin(), + subset.end(), + superset.begin(), + superset.end(), + std::back_inserter(excessDevices), + [](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); }); + TORCH_CHECK_VALUE( + excessDevices.empty(), + "The result contained tensors residing on device(s) ", + formatSetOfDevices(excessDevices), + " which are not among the expected device(s) ", + formatSetOfDevices(superset)); + } + + mutable std::mutex mutex_; + std::atomic_bool completed_ = {false}; // is this future complete + std::condition_variable finished_cv_; + + IValue value_; // when finished the value + TypePtr type_; + std::vector callbacks_; + std::exception_ptr eptr_; + + // An upcast pointer to a virtual class which allows us to manipulate events, + // streams, ... in a generic way, without an explicit dependency on CUDA. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const c10::impl::VirtualGuardImpl impl_; + + // The device that was current when markCompleted was called, which we'll + // restore when invoking callbacks. It's optional because we'll only store it + // if the future completes successfully. + std::optional currentDevice_; + + // The events that correspond to the completion of the async I/O kernels. They + // are recorded on the appropriate streams when the future is marked completed + // and can then be queried/waited/blocked on. There is one event for each + // distinct device on which the value's tensors reside. + std::vector events_; + + // A cached version of the storages extracted from the value when the future + // is first marked completed. + std::vector storages_; + + // The bounding set of devices that this future, and any of its children, is + // allowed to use. This is a superset of the set of devices used by the events + // above. We need this to know what streams (for which devices) to set as + // current when invoking a callback, thus allowing the callback to use devices + // that the parent future didn't use. This field is set to the value provided + // in the constructor and will be "inherited" by all child futures. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector devices_; +}; + +struct C10_EXPORT ivalue::Await final : c10::intrusive_ptr_target { + private: + explicit Await(TypePtr elType, std::function fn) + : elType_(std::move(elType)), type_(AwaitType::create(elType_)), fn_(std::move(fn)) {} + + explicit Await(TypePtr elType) : elType_(std::move(elType)), type_(AwaitType::create(elType_)) { } + + friend c10::intrusive_ptr; + + public: + Await(const Await&) = delete; + Await(Await&&) = delete; + Await& operator=(const Await&) = delete; + Await& operator=(Await&&) = delete; + ~Await() override = default; + + IValue wait() { + if (!completed_) { + TORCH_CHECK(fn_, "Incompleted Await: fn can't be None"); + value_ = fn_(); + completed_ = true; + args_ = {}; + } + return value_; + } + + IValue value() { + TORCH_CHECK(completed_, "Await must be completed"); + return value_; + } + + void setFn(std::function fn) { + fn_ = std::move(fn); + } + + bool completed() { + return completed_; + } + + void markCompleted(IValue value) { + value_ = std::move(value); + completed_ = true; + } + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const Await& v); + + const TypePtr& elementType() const { + return elType_; + } + + const TypePtr& type() const { + return type_; + } + + void setArgs(std::vector args) { + args_ = std::move(args); + } + + std::vector& args() { + return args_; + } + + private: + TypePtr elType_; + TypePtr type_; + std::vector args_; + std::function fn_; + IValue value_; + bool completed_{}; +}; + +// Input is a list of Futures with the same target type. +// Output is a Future to the List of completed Futures. +TORCH_API intrusive_ptr collectAll( + const c10::List>& srcs); +// Input is a List of Futures with the same target type. +// Output is a Future that will be updated with a seen value. +TORCH_API intrusive_ptr collectAny( + const c10::List>& srcs); + +// User-defined object. +struct C10_EXPORT ivalue::Object final : c10::intrusive_ptr_target { + public: + // In general, class types hold a shared_ptr to its owning CompilationUnit, + // so that its type and methods do not get deallocated while the class exists. + // However, the CompilationUnit holds ownership of the type's graphs, so + // inserting a constant object into a Graph would create a reference cycle if + // that constant object held a shared_ptr to its CU. For these objects we + // instantiate them with non-owning references to its CU + Object(WeakOrStrongTypePtr type, size_t numSlots) : type_(std::move(type)) { + slots_.resize(numSlots); + } + + Object(StrongTypePtr type, size_t numSlots) + : type_(WeakOrStrongTypePtr(std::move(type))) { + slots_.resize(numSlots); + } + + static c10::intrusive_ptr create( + WeakOrStrongTypePtr type, + size_t numSlots) { + return c10::make_intrusive(std::move(type), numSlots); + } + + static c10::intrusive_ptr create( + StrongTypePtr type, + size_t numSlots) { + return c10::make_intrusive(std::move(type), numSlots); + } + + static c10::intrusive_ptr create(ClassTypePtr classType, size_t numSlots); + + /** + * Slot API. + * + * Attributes are stored as a simple vector so that lookups are fast at + * runtime. A "slot" is just an index into that vector, which can be computed + * statically if you have access to the class type. Use this API if you are + * writing compiler stuff. + */ + void setSlot(size_t slot, IValue v) { + if (slot >= slots_.size()) { + // for module types, it is possible that the members of the class have + // expanded after the object was created. In this case, we expand + // the slots to the right size + resizeObject(slot); + } + slots_[slot] = std::move(v); + } + + const IValue& getSlot(size_t slot) const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(slot < slots_.size()); + // NOTE: This lookup is fairly hot, so we use unchecked access to the + // vector. Errors should still be detectable with ASan. + return slots_[slot]; + } + + void unsafeRemoveSlot(size_t slot) { + TORCH_CHECK(slot < slots_.size()); + slots_.erase(slots_.begin() + static_cast(slot)); + } + + /** + * Attribute API. + * + * Wrappers around the slot stuff so that users can access attributes + * directly. Use this API if you are a user. + * + * Note: Unlike in Python, TorchScript must make a distinction between + * attributes (which are IValues) and methods (which are Methods). If you + * want a method, use `obj.type()->getMethod()` + */ + IValue getAttr(const std::string& name) const; + void setAttr(const std::string& name, IValue v); + // Remove attribute by name, caller is responsible for + // the safety of this operation + // We didn't remove the attribute in the type because the type + // might be shared by multiple objects. + // Therefore after removing attribute, the object is in an inconsistent + // state where it has more attribute types in its Type than + // the attribute slots it has, user needs to make sure the object + // has consistent by removing the attribute in type as well + void unsafeRemoveAttr(const std::string& name); + + std::string name() const; + + const std::vector& slots() const { + return slots_; + } + std::shared_ptr type() const; + + std::shared_ptr compilation_unit() { + if (type_.holds_strong_ref()) { + return type_.cu_.getStrongRefOrThrow(); + } else { + auto weak_ptr = type_.cu_.getWeakRefOrThrow(); + return std::shared_ptr(weak_ptr); + } + } + + c10::intrusive_ptr copy_to_weak_compilation_ref() const; + + void unsafe_make_weak_compilation_ref() { + type_ = WeakOrStrongTypePtr(type_.asWeakTypePtr()); + } + + c10::intrusive_ptr copy() const; + + c10::intrusive_ptr deepcopy( + std::optional device = std::nullopt) const; + + c10::intrusive_ptr deepcopy( + IValue::HashIdentityIValueMap& memo, + std::optional device = std::nullopt) const; + + bool is_weak_compilation_ref() const { + return !type_.holds_strong_ref(); + } + + bool is_empty_strong_compilation_ref() const { + return type_.holds_empty_strong_ref(); + } + + private: + void resizeObject(size_t slot); + WeakOrStrongTypePtr type_; + std::vector slots_; +}; + +// virtual ivalue PyObjectHolder that hold a py::object, we make this virtual +// because the py::object and refcounting logic should happen in libtorch_python +// see concrete implementation in python_ivalue.h +struct ivalue::PyObjectHolder : c10::intrusive_ptr_target { + public: + virtual PyObject* getPyObject() = 0; + virtual c10::InferredType tryToInferType() = 0; + virtual IValue toIValue(const TypePtr& type, std::optional N = std::nullopt) = 0; + virtual std::string toStr() = 0; + virtual std::vector extractTensors() = 0; + + ~PyObjectHolder() override = default; +}; + +struct ivalue::EnumHolder : c10::intrusive_ptr_target { + public: + EnumHolder(std::shared_ptr type, std::string name, IValue value) + : type_(std::move(type)), + name_(std::move(name)), + value_(std::move(value)) {} + + bool is(const ivalue::EnumHolder& rhs) { + return *this == rhs; + } + + friend bool operator==( + const ivalue::EnumHolder& lhs, + const ivalue::EnumHolder& rhs); + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const ivalue::EnumHolder& v); + + TORCH_API const std::string& qualifiedClassName() const; + + const std::string& unqualifiedClassName() const; + + const std::string& name() const { + return name_; + } + + const IValue& value() const { + return value_; + } + + std::shared_ptr type() const { + return type_; + } + + private: + std::shared_ptr type_; + std::string name_; + IValue value_; +}; + +#undef TORCH_FORALL_TAGS + +namespace detail { + +struct _guarded_unsigned_long_unique_dummy final { + _guarded_unsigned_long_unique_dummy(int64_t /*unused*/){} +}; +using _guarded_unsigned_long = std::conditional_t< + std::is_same_v || + std::is_same_v, + _guarded_unsigned_long_unique_dummy, + unsigned long>; + +} // namespace detail + +inline ivalue::Object& IValue::toObjectRef() const { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), "Attempted to create null reference"); + return *static_cast(payload.u.as_intrusive_ptr); +} + +// note: when adding a DEFINE_TO case here you should also add a +// toX method to IValue. These named methods are much more discoverable +// than the to templated function. + +#define DEFINE_TO(T, method_name) \ + template <> \ + inline T IValue::to()&& { \ + return static_cast(std::move(*this).method_name()); \ + } \ + template <> \ + inline c10::detail::ivalue_to_const_ref_overload_return::type IValue::to() const& { \ + typedef c10::detail::ivalue_to_const_ref_overload_return::type return_type; \ + return static_cast(this->method_name()); \ + } + +DEFINE_TO(at::Tensor, toTensor) +DEFINE_TO(at::Storage, toStorage) +DEFINE_TO(c10::Stream, toStream) +DEFINE_TO(float, toDouble) +DEFINE_TO(double, toDouble) +DEFINE_TO(c10::complex, toComplexDouble) +DEFINE_TO(unsigned char, toInt) +DEFINE_TO(signed char, toInt) +DEFINE_TO(unsigned short, toInt) +DEFINE_TO(short, toInt) +DEFINE_TO(int, toInt) +DEFINE_TO(uint32_t, toInt) +DEFINE_TO(uint64_t, toInt) +DEFINE_TO(detail::_guarded_unsigned_long, toInt) +DEFINE_TO(int64_t, toInt) +DEFINE_TO(bool, toBool) +DEFINE_TO(c10::intrusive_ptr, toBlob) +DEFINE_TO(c10::intrusive_ptr, toString) +DEFINE_TO(c10::intrusive_ptr, toObject) +DEFINE_TO(at::Scalar, toScalar) +DEFINE_TO(c10::List, toIntList) +DEFINE_TO(c10::List, toSymIntList) +DEFINE_TO(c10::List, toDoubleList) +DEFINE_TO(c10::List>, toComplexDoubleList) +DEFINE_TO(c10::List, toBoolList) +DEFINE_TO(c10::List, toTensorList) +DEFINE_TO(c10::impl::GenericList, toList) +DEFINE_TO(c10::impl::GenericDict, toGenericDict) +DEFINE_TO(c10::intrusive_ptr, toTuple) +DEFINE_TO(std::string, toStringRef) +DEFINE_TO(std::string_view, toStringView) +DEFINE_TO(c10::intrusive_ptr, toFuture) +DEFINE_TO(c10::intrusive_ptr, toAwait) +DEFINE_TO(c10::intrusive_ptr, toRRef) +DEFINE_TO(c10::intrusive_ptr, toQuantizer) +DEFINE_TO(IValue, toIValue) +DEFINE_TO(c10::Device, toDevice) +DEFINE_TO(at::ScalarType, toScalarType) +DEFINE_TO(at::Layout, toLayout) +DEFINE_TO(at::MemoryFormat, toMemoryFormat) +DEFINE_TO(at::QScheme, toQScheme) +DEFINE_TO(at::Dimname, toDimname) +DEFINE_TO(at::Generator, toGenerator) +DEFINE_TO(c10::SymInt, toSymInt) +DEFINE_TO(c10::SymFloat, toSymFloat) +DEFINE_TO(c10::SymBool, toSymBool) + +template +struct _fake_type {}; + +// generic_to converts an IValue from a generic list or generic dict +// to a concrete list/dict type likelike List, Dict<...> or std::optional. +// Note that in the case of lists, this only works for IValue-based lists, +// i.e. not for int64_t, double, ... +// generic_to is an implementation detail of IValue::to and not +// supposed to be called directly. +// The _fake_type parameter allows us to overload +// based on the return type. +template +// TODO this is deprecated but we don't throw a warning because a lot of ops in +// native_functions.yaml still return std::vector. +// C10_DEPRECATED_MESSAGE("IValues based on std::vector are potentially slow +// and deprecated. Please use torch::List instead.") +std::vector generic_to(IValue ivalue, _fake_type> /*unused*/) { + // We need to do a deep copy of the vector because there might be other + // references to this same IValue that also use the list. We can't just + // move the elements out. + auto list = std::move(ivalue).template to>(); + std::vector result; + result.reserve(list.size()); + for (Elem v : list) { + result.push_back(std::move(v)); + } + return result; +} + +template +c10::intrusive_ptr IValue::toCustomClass() && { + static_assert( + std::is_base_of_v == true, + "toCustomClass requires that template parameter T must inherit " + "from torch::CustomClassHolder"); + auto obj = toObject(); + TORCH_CHECK( + obj->slots().size() == 1, + "Tried to cast IValue to custom class but it did " + "not contain a custom class!"); + const auto* expected_type = c10::getCustomClassType>().get(); + ivalue::checkCustomClassType(expected_type, type().get()); + auto userObj = + c10::static_intrusive_pointer_cast(obj->getSlot(0).toCapsule()); + return userObj; +} + +template +c10::intrusive_ptr IValue::toCustomClass() const& { + static_assert( + std::is_base_of_v == true, + "toCustomClass requires that template parameter T must inherit " + "from torch::CustomClassHolder"); + auto obj = toObject(); + TORCH_CHECK( + obj->slots().size() == 1, + "Tried to cast IValue to custom class but it did " + "not contain a custom class!"); + const auto* expected_type = c10::getCustomClassType>().get(); + ivalue::checkCustomClassType(expected_type, type().get()); + auto userObj = + c10::static_intrusive_pointer_cast(obj->getSlot(0).toCapsule()); + return userObj; +} + +template +T generic_to(IValue ivalue, _fake_type /*unused*/) { + using ElemType = typename std::remove_pointer::type::element_type; + return std::move(ivalue).template toCustomClass(); +} + +template +tagged_capsule generic_to(IValue ivalue, _fake_type> /*unused*/) { + return tagged_capsule{std::move(ivalue)}; +} + +template +c10::List generic_to(IValue ivalue, _fake_type> /*unused*/) { + return impl::toTypedList(std::move(ivalue).toList()); +} + +template +static T createVectorLikeFromList(const c10::detail::ListImpl* impl) { + T result; + result.reserve(impl->list.size()); + for (const auto & i : impl->list) { + result.push_back(i.to()); + } + return result; +} + +template +static std::vector createVectorFromList(const c10::detail::ListImpl* impl) { + return createVectorLikeFromList>(impl); +} + +template +std::vector createVectorFromList(const c10::List& impl) { + std::vector result; + result.reserve(impl.size()); + for (size_t i = 0, N = impl.size(); i < N; ++i) { + result.push_back(impl[i]); + } + return result; +} + +template +OptionalArray generic_to(IValue ivalue, _fake_type> /*unused*/) { + if (ivalue.isNone()) { + return {}; + } + return createVectorFromList( + std::move(ivalue).template to>() + ); +} + +namespace detail { +template +std::array generic_to_array( + IValue ivalue, + _fake_type> /*unused*/, + std::index_sequence /*unused*/) { + // We need to do a deep copy of the array because there might be other + // references to this same IValue that also use the list. We can't just + // move the elements out. + auto list = std::move(ivalue).template to>(); + TORCH_CHECK( + list.size() == sizeof...(I), + "Tried to convert a List with ", + list.size(), + " elements to a fixed-size array of size ", + sizeof...(I)); + return {list[I]...}; +} +} // namespace detail + +template +std::array generic_to( + IValue ivalue, + _fake_type> ft) { + return detail::generic_to_array(ivalue, ft, std::make_index_sequence()); +} + +template +c10::Dict generic_to( + IValue ivalue, + _fake_type> /*unused*/) { + return impl::toTypedDict(std::move(ivalue).toGenericDict()); +} + +template +C10_DEPRECATED_MESSAGE( + "IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict instead.") +std::unordered_map generic_to( + IValue ivalue, + _fake_type> /*unused*/) { + std::unordered_map specialized_dict; + + for (const auto& item : std::move(ivalue).toGenericDict()) { + specialized_dict[item.key().template to()] = item.value().template to(); + } + + return specialized_dict; +} + +template +std::optional generic_to(IValue ivalue, _fake_type> /*unused*/) { + if (ivalue.isNone()) { + return std::nullopt; + } + return std::move(ivalue).template to(); +} + +namespace detail { +template +Tuple generic_to_tuple_impl( + const ivalue::TupleElements& t, + std::index_sequence /*unused*/) { + return std::make_tuple( + t[INDEX].to::type>()...); +} +} // namespace detail + +template < + typename... Args, + typename Indices = std::make_index_sequence, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> +std::tuple generic_to(const IValue& ivalue, _fake_type> /*unused*/) { + const auto& vals = ivalue.toTupleRef().elements(); + TORCH_CHECK(vals.size() == sizeof...(Args)); + return detail::generic_to_tuple_impl>(vals, Indices{}); +} + +template +inline T IValue::to() && { + return generic_to(std::move(*this), _fake_type{}); +} + +template <> +inline std::optional IValue::to() && { + // In the default implementation, the IValue is destroyed with std::move. + // But if the unboxed type is std::optional we cannot destroy + // the IValue. + return generic_to(*this, _fake_type>{}); +} + +template +inline typename c10::detail::ivalue_to_const_ref_overload_return::type IValue::to() const& { + return generic_to(*this, _fake_type{}); +} + +inline c10::List IValue::toIntList() && { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toIntList() const& { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toIntVector() const { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toIntVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toSymIntList() && { + AT_ASSERT( + isSymIntList() || isIntList(), + "Expected SymIntList or IntList but got ", + tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toSymIntList() const& { + AT_ASSERT( + isSymIntList() || isIntList(), + "Expected SymIntList or IntList but got ", + tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toSymIntVector() const { + AT_ASSERT(isSymIntList() || isIntList(), "Expected SymIntList or IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toSymIntVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline at::DimVector IValue::toDimVector() const { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toDimVector on null intrusive_ptr IValue"); + return createVectorLikeFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toDoubleList() && { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toDoubleList() const& { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toDoubleVector() const { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toDoubleVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List> IValue::toComplexDoubleList() && { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + return c10::List>(moveToIntrusivePtr()); +} +inline c10::List> IValue::toComplexDoubleList() const& { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + return c10::List>(toIntrusivePtr()); +} +inline std::vector> IValue::toComplexDoubleVector() const { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toComplexDoubleVector on null intrusive_ptr IValue"); + return createVectorFromList>( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toBoolList() && { + AT_ASSERT(isBoolList(), "Expected BoolList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toBoolList() const& { + AT_ASSERT(isBoolList(), "Expected BoolList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline c10::List IValue::toTensorList() && { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toTensorList() const& { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toTensorVector() const { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toTensorVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List> IValue::toOptionalTensorList() && { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + return c10::List>(moveToIntrusivePtr()); +} +inline c10::List> IValue::toOptionalTensorList() const& { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + return c10::List>(toIntrusivePtr()); +} +inline std::vector> IValue::toOptionalTensorVector() const { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toOptionalTensorVector on null intrusive_ptr IValue"); + return createVectorFromList>( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toList() && { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toList() const& { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline c10::ArrayRef IValue::toListRef() const { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toListRef on null intrusive_ptr IValue"); + return static_cast(payload.u.as_intrusive_ptr) + ->list; +} +inline c10::Dict IValue::toGenericDict() && { + AT_ASSERT(isGenericDict(), "Expected GenericDict but got ", tagKind()); + return c10::Dict(moveToIntrusivePtr()); +} +inline c10::Dict IValue::toGenericDict() const& { + AT_ASSERT(isGenericDict(), "Expected GenericDict but got ", tagKind()); + return c10::Dict(toIntrusivePtr()); +} +inline c10::intrusive_ptr IValue::toTuple() && { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toTuple() const& { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + return toIntrusivePtr(); +} +inline ivalue::Tuple& IValue::toTupleRef() const { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toTupleRef on null intrusive_ptr IValue"); + return *static_cast( + payload.u.as_intrusive_ptr); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Tuple) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} +template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t>> +inline IValue::IValue(const std::tuple& t) + : IValue(std::apply(c10::ivalue::Tuple::create, t)) { +} + +template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t>> +inline IValue::IValue(std::tuple&& t) + : IValue(std::apply(c10::ivalue::Tuple::create, std::move(t))) { +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::String) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} +inline IValue::IValue(std::string v) + : IValue(ivalue::ConstantString::create(std::move(v))) {} + +inline IValue::IValue(c10::impl::GenericList v) + : tag(Tag::GenericList) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.impl_.release()); +} + +template > +inline IValue::IValue(c10::List&& v) : IValue(impl::toList(std::move(v))) {} +template > +inline IValue::IValue(const c10::List& v) : IValue(impl::toList(v)) {} +template > +inline IValue::IValue(at::ArrayRef v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } +} +template > +inline IValue::IValue(at::ArrayRef v) : IValue() { + auto vi = c10::asIntArrayRefSlowOpt(v); + if (vi.has_value()) { + // This list is entirely integers; ensure it is typed as + // an IntList so toIntList works + *this = IValue(*vi); + } else { + // This list has SymInts; type it as a SymInt + *this = IValue(impl::toList(c10::List())); + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } + } +} +template > +inline IValue::IValue(at::OptionalArrayRef mb_v) : IValue() { + if (!mb_v.has_value()) return; + *this = IValue(*mb_v); +} +template > +inline IValue::IValue(const std::vector& v) : IValue() { + *this = IValue(at::ArrayRef(v)); +} +template > +inline IValue::IValue(std::vector&& v) : IValue() { + auto vi = c10::asIntArrayRefSlowOpt(v); + if (vi.has_value()) { + // This list is entirely integers; ensure it is typed as + // an IntList so toIntList works + *this = IValue(*vi); + } else { + // This list has SymInts; type it as a SymInt + *this = IValue(impl::toList(c10::List())); + auto list = to>(); + list.reserve(v.size()); + for (auto&& e : std::move(v)) { + list.push_back(std::move(e)); + } + } +} +template > +inline IValue::IValue(const std::vector& v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } +} + +template > +inline IValue::IValue(std::vector&& v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + if constexpr (std::is_same_v) { + for (auto e : v) { + list.push_back(e); + } + } else { + for (auto&& e : std::move(v)) { + list.push_back(std::move(e)); + } + } +} + +template > +inline IValue::IValue(c10::OptionalArrayRef v) : IValue() { + if (v.has_value()) { + *this = IValue(std::move(*v)); + } +} + +template +inline IValue::IValue(std::array v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (auto& e : v) { + list.push_back(std::move(e)); + } +} + +template > +inline IValue::IValue(c10::IListRef v) : IValue() { + constexpr bool boxed_type_constructs_ivalue = + std::is_constructible_v::boxed_type>; + // First, we try to use the boxed value. + // If we fail (either it's not in the boxed state, or its boxed type + // can not construct an IValue), we fallback to copying the list. + if (boxed_type_constructs_ivalue && v.isBoxed()) { + *this = IValue(impl::toList(v.toBoxed())); + } else { + c10::List list; + list.reserve(v.size()); + for (const auto& t : v) { + list.push_back(t); + } + *this = IValue(impl::toList(std::move(list))); + } +} + +inline IValue::IValue(c10::impl::GenericDict v) + : tag(Tag::GenericDict) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.impl_.release()); +} +template +inline IValue::IValue(c10::Dict v) + : IValue(impl::toGenericDict(std::move(v))) {} + +template +inline IValue::IValue(std::unordered_map v) + : IValue(Dict()) { + auto dict = to>(); + dict.reserve(v.size()); + for (auto& e : v) { + dict.insert(std::move(e.first), std::move(e.second)); + } +} + +template > +inline IValue::IValue(std::optional v) : IValue() { + if (v.has_value()) { + *this = IValue(std::move(*v)); + } +} + +inline IValue::IValue(std::nullopt_t /*unused*/) : IValue() {} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Object) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::PyObject) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Enum) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue IValue::make_capsule( + intrusive_ptr blob) { + IValue iv; + iv.tag = Tag::Capsule; + iv.payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); + return iv; +} + +template < + typename T, + std::enable_if_t, int>> +IValue::IValue(c10::intrusive_ptr custom_class) : tag(Tag::Object) { + auto classType = []() { + try { + return c10::getCustomClassType>(); + } catch (const c10::Error&) { + throw c10::Error( + "Trying to instantiate a class that isn't a registered custom class: " + + std::string(c10::util::get_fully_qualified_type_name())); + } + }(); + auto ivalue_obj = c10::ivalue::Object::create(std::move(classType), /* numSlots */1); + ivalue_obj->setSlot(0, IValue::make_capsule(std::move(custom_class))); + payload.u.as_intrusive_ptr = null_to_undefined_tensor(ivalue_obj.release()); + +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Future) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Await) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::RRef) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Quantizer) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +template +inline IValue::IValue(c10::complex c) + : tag(Tag::ComplexDouble) { + auto v = c10::make_intrusive(c); + payload.u.as_intrusive_ptr = v.release(); +} + +inline const std::string& IValue::toStringRef() const { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toStringRef on null intrusive_ptr IValue"); + return static_cast( + payload.u.as_intrusive_ptr) + ->string(); +} +inline std::optional> IValue:: + toOptionalStringRef() const { + if (isNone()) { + return std::nullopt; + } + AT_ASSERT(isString(), "Expected std::optional but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toOptionalStringRef on null intrusive_ptr IValue"); + return std::reference_wrapper( + static_cast(payload.u.as_intrusive_ptr) + ->string()); +} + +inline std::string_view IValue::toStringView() const { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toStringView on null intrusive_ptr IValue"); + return static_cast( + payload.u.as_intrusive_ptr) + ->string_view(); +} + +inline PyObject* IValue::toPyObject() const { + return toPyObjectHolder()->getPyObject(); +} + +template +inline std::optional IValue::toOptional() { + if (this->isNone()) { + return std::nullopt; + } + return this->to(); +} + +template +inline std::optional IValue::toOptional() const { + if (this->isNone()) { + return std::nullopt; + } + return this->to(); +} + +inline bool IValue::isCustomClass() const { + return torch::isCustomClass(*this); +} + +inline bool IValue::isSameIdentity(const IValue& rhs) const { + // We choose to not use memcmp for payload check due to potential random + // padding characters on union type + + // Semantics: + // 1. Immutable primitive values of the same type (Int, Double, None, Bool, + // Str) return value equality + // 2. If it is a tensor type, we need to take undefined tensor into account + // 3. Undefined_tensor is None and vice versa should be true + // 4. If it is a reference type (i.e. isIntrusivePtr()), then is True when + // the pointed-to object is the same. + // 5. False for all other comparisons. + if (this->isNone() && rhs.isNone()) { + return true; + } else if (this->isBool() && rhs.isBool()) { + // for bool type, do equality check + return this->toBool() == rhs.toBool(); + } else if (this->isTensor() && rhs.isTensor()) { + return this->payload.as_tensor.is_same(rhs.payload.as_tensor); + } else if (this->isTensor() && rhs.isNone()) { + // special case: undefined tensor and None are the same identity + return !this->payload.as_tensor.defined(); + } else if (this->isNone() && rhs.isTensor()) { + // special case: undefined tensor and None are the same identity + return !rhs.payload.as_tensor.defined(); + } else if (this->isInt() && rhs.isInt()) { + return this->toInt() == rhs.toInt(); + } else if (this->isDouble() && rhs.isDouble()) { + return this->toDouble() == rhs.toDouble(); + } else if (this->isString() && rhs.isString()) { + return this->toStringRef() == rhs.toStringRef(); + } else { + // for objects holding in IValue, do shallow compare on pointer address to + // testify the identity + return this->isIntrusivePtr() && rhs.isIntrusivePtr() && + this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr; + } +} + +namespace ivalue { +namespace detail { + +template +IValue from_(T&& x, std::true_type /*unused*/) { + return IValue(std::forward(x)); +} +template +IValue from_(c10::intrusive_ptr x, std::false_type /*unused*/) { + return IValue(std::move(x)); +} +template +IValue from_(T&& /*x*/, std::false_type /*unused*/) { + static_assert( + guts::false_t::value, + "You are calling from with a type that it doesn't support, and isn't a potential custom class (ie: is an intrusive_ptr)"); + return IValue(); +} +} // namespace detail + +template +IValue from(T&& x) { + return detail::from_( + std::forward(x), typename std::is_constructible::type{}); +} + +} // namespace ivalue + + +template <> +struct MaybeOwnedTraits { + using owned_type = IValue; + using borrow_type = IValue; + + static borrow_type createBorrow(const owned_type& from) { + if (!from.isPtrType()) { + return from; + } + if (from.isTensor()) { + return IValue(MaybeOwnedTraits::createBorrow(from.toTensor())); + } else { + return IValue(from.payload, from.tag); + } + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.clearToNone(); + if (!rhs.isPtrType()) { + lhs = rhs; + } else if (rhs.isTensor()) { + lhs = IValue(MaybeOwnedTraits::createBorrow(rhs.toTensor())); + } else { + lhs = IValue(rhs.payload, rhs.tag); + } + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.clearToNone(); + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*unused*/) { + return true; + } +}; + +template <> +struct IValue::TagType { + static TORCH_API c10::TypePtr get(const IValue& /*v*/); +}; + +template <> +struct IValue::TagType { + static TORCH_API c10::TypePtr get(const IValue& /*v*/); +}; + +template +TypePtr IValue::type() const { + return IValue::TagType::get(*this); +} + +} // namespace c10 + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h new file mode 100644 index 0000000000000000000000000000000000000000..52a3f23bbb0475948e101e1d368d7952888f8528 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at { +class Tensor; +} // namespace at + +namespace c10 { +struct IValue; +namespace detail { +// Determine the return type of `IValue::to() const &`. It's a const +// reference when possible and a copy otherwise. It is in this +// separate header so that List can use it as well. +template +struct ivalue_to_const_ref_overload_return { + using type = T; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const at::Tensor&; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const std::string&; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const IValue&; +}; + +} // namespace detail +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h new file mode 100644 index 0000000000000000000000000000000000000000..bec7f2e5823177274af77717db8f59090914c676 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h @@ -0,0 +1,2435 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +namespace torch::jit { +struct Function; +} // namespace torch::jit + + +namespace c10 { + +template +class Dict; +struct IValue; +struct FunctionSchema; +struct NamedType; +using OptNameList = std::optional>; + +void standardizeVectorForUnion(std::vector& reference, std::vector* to_fill); +void standardizeVectorForUnion(std::vector* to_flatten); + +inline bool is_contiguous_strides( + const IntArrayRef sizes, + const IntArrayRef strides) { + size_t n_dim = sizes.size(); + if (n_dim == 0) { + return true; + } + + if (strides[n_dim - 1] != 1) { + return false; + } + + for (int i = static_cast(n_dim) - 2; i >= 0; i--) { + if (strides[i] != strides[i + 1] * sizes[i + 1]) { + return false; + } + } + return true; +} + +struct AnyType; +using AnyTypePtr = SingletonTypePtr; +// Any is the top of the type hierarchy, all other types are subtypes +// T <: Any, forall T +struct TORCH_API AnyType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Any"; + } + static const TypeKind Kind = TypeKind::AnyType; + // global singleton + static AnyTypePtr get(); + + private: + AnyType() : Type(TypeKind::AnyType) {} +}; + +inline std::string toString(const Type& type) { + return type.str(); +} + +// Shim for compatibility with code that uses TypePtr. +inline std::string toString(const TypePtr& typePtr) { + return toString(*typePtr); +} + +inline bool operator!=(const Type& lhs, const Type& rhs) { + return !(lhs == rhs); +} + +// common base for all types that have a single sub element +// e.g. Future[T], Optional[T], List[T] +template +struct SingleElementType : public SharedType { + static const TypeKind Kind = K; + + const TypePtr& getElementType() const { + return elem; + } + + bool hasFreeVariables() const override { + return getElementType()->hasFreeVariables(); + } + + at::ArrayRef containedTypes() const override { + return elem; + } + + bool equals(const Type& rhs) const override { + if (auto rhs_ = rhs.cast()) { + return *getElementType() == *rhs_->getElementType(); + } + return false; + } + + protected: + SingleElementType(TypePtr elem) : SharedType(Kind), elem(std::move(elem)) { + TORCH_CHECK(this->elem, c10::str( + "Can not create ", typeKindToString(Kind), " with None type")); + } + + private: + TypePtr elem; +}; + +struct UnionType; +using UnionTypePtr = std::shared_ptr; +struct TORCH_API UnionType : public SharedType { + friend struct Type; + + static const TypeKind Kind = TypeKind::UnionType; + + bool isSubtypeOfExt(const Type& rhs_, std::ostream* why_not) const override; + + std::string str() const override; + + static UnionTypePtr create(std::vector reference); + + bool equals(const Type& rhs) const override; + + bool isUnionType() const override { + return true; + } + + at::ArrayRef containedTypes() const override { + return types_; + } + + // For testing purposes only + at::ArrayRef getTypes() const { + return types_; + } + + TypePtr createWithContained(std::vector contained_types) const override { + return create(std::move(contained_types)); + } + + bool canHoldType(const Type& type) const; + + bool hasFreeVariables() const override { + return has_free_variables_; + } + + std::optional toOptional() const; + + std::optional subtractTypeSet(std::vector& to_subtract) const; + + protected: + explicit UnionType(std::vector types, TypeKind kind=TypeKind::UnionType); + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + std::string unionStr( + const TypePrinter& printer = nullptr, + bool is_annotation_str = false) const; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool has_free_variables_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector types_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool can_hold_none_; + +}; + +struct OptionalType; +using OptionalTypePtr = std::shared_ptr; +// This type represents an optional type. There is one `Optional` for +// each element type. `Optional[T]` can accept both `T` and +// `None`(`std::nullopt` in C++) +// Subtype hierarchy for Optional: +// - Optional[T] <: Optional[R] iff T <: R +// - T <: Optional[R] if T <: R +// - None <: Optional[T] for all T +// - Optional[T] == Union[T, None] for all T +struct TORCH_API OptionalType : public UnionType { + static OptionalTypePtr create(const TypePtr& contained); + + static const TypeKind Kind = TypeKind::OptionalType; + + friend struct Type; + + bool equals(const Type& rhs) const override; + + const TypePtr& getElementType() const { + return contained_; + } + + at::ArrayRef containedTypes() const override { + return contained_; + } + + std::string str() const override { + std::stringstream ss; + ss << getElementType()->str() << '?'; + return ss.str(); + } + + TypePtr createWithContained( + std::vector contained_types) const override { + AT_ASSERT(contained_types.size() == 1); + return create(contained_types[0]); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + bool isUnionType() const override { + return true; + } + + // common cast Optional[Tensor] for undefined tensor type + static TypePtr ofTensor(); + // + // global singleton + static TypePtr get(TypePtr inner); + + private: + explicit OptionalType(const TypePtr& contained); + + TypePtr contained_; + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Optional[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +template +inline std::optional merge_primitive( + const std::optional& a, + const std::optional& b) { + if (a.has_value() && b.has_value() && a.value() == b.value()) { + return a; + } + return std::optional{}; +} + +// If we see `a + b + c` and know that a, b, and c are the same size and have +// two dimensions (WxH), then we can generate a fused kernel for them. That +// fused kernel would likely have indexing math to handling both the W and H +// dimensions. However, if we knew the WxH dimensions were contiguous, we can +// pretend like we only have a single dimension, simplifying the indexing logic. +// This can be performed even if the dimensions are transposed, +// as long as a, b, and c are transposed in the same way. +// We'd like to have the compiler be able to do this dimensionality reduction, +// but simply knowing sizes is not enough. +// We can extend profiling to also record stride information. +// Rather than recording specific strides, +// we can simply order the strides from smallest to largest with +// `stride_indices` A contiguity marker on the smallest stride (c0) indicates +// the stride is precisely 1, otherwise a contiguity marker means that $stride_n +// = size_{n-1}*stride_{n-1}$ +struct TORCH_API Stride { + Stride() = default; + Stride( + const std::optional& stride_index, + std::optional contiguous, + const std::optional& stride) + : stride_index_(stride_index), contiguous_(contiguous), stride_(stride) {} + + bool operator==(const Stride& b) const { + return stride_index_ == b.stride_index_ && contiguous_ == b.contiguous_ && + stride_ == b.stride_; + } + + bool isComplete() const { + return stride_index_ && contiguous_ && stride_; + } + + std::optional stride_index_; + std::optional contiguous_; + std::optional stride_; +}; + +template <> +inline std::optional merge_primitive( + const std::optional& a, + const std::optional& b) { + std::optional left = a; + std::optional right = b; + if (!left.has_value()) { + left = {Stride()}; + } + if (!right.has_value()) { + right = {Stride()}; + } + + auto merged_index = + merge_primitive(left->stride_index_, right->stride_index_); + auto merged_cont = merge_primitive(left->contiguous_, right->contiguous_); + auto merged_stride = merge_primitive(left->stride_, right->stride_); + auto r = Stride(merged_index, merged_cont, merged_stride); + // normalize + if (!r.stride_index_.has_value() && !r.contiguous_.has_value() && + !r.stride_.has_value()) { + return std::optional{}; + } + + return r; +} + +struct TORCH_API ShapeSymbol { + // needed for use in `std::map` + ShapeSymbol() : value_(-1) {} + // is this symbol a fixed/static dimension + bool is_static() const { + return value_ >= 0; + } + bool operator==(const ShapeSymbol& b) const { + return value_ == b.value_; + } + bool operator<(const ShapeSymbol& b) const { + return value_ < b.value_; + } + + static ShapeSymbol fromStaticSize(int64_t val) { + return ShapeSymbol(val); + } + int64_t static_size() const { + TORCH_CHECK(is_static()); + return value_; + } + + int64_t value() const { + return value_; + } + + static ShapeSymbol newSymbol() { + return fromStaticSize(-static_cast(++num_symbols)); + } + friend TORCH_API std::ostream& operator<<( + std::ostream& os, + const ShapeSymbol& s); + + private: + ShapeSymbol(int64_t val) : value_(val) {} + int64_t value_; + static std::atomic num_symbols; +}; + +inline ShapeSymbol merge_primitive( + const ShapeSymbol& a, + const ShapeSymbol& b) { + if (a.is_static() && b.is_static() && a == b) { + return a; + } + return ShapeSymbol::newSymbol(); +} + +// Shape of a Tensor represented with ShapeSymbol's. Unranked, ranked unknown +// dims, partially known and fully known shapes are all supported. +struct TORCH_API SymbolicShape { + // Unranked shape constructor. + SymbolicShape() : dims_(std::nullopt) {} + + // Known rank but unknown dimensions. + SymbolicShape(std::optional rank) : dims_(std::nullopt) { + if(!rank) { + return; + } + + std::vector shape_symbols; + shape_symbols.reserve(*rank); + for(size_t i = 0; i < *rank; ++i) { + shape_symbols.push_back(ShapeSymbol::newSymbol()); + } + dims_ = shape_symbols; + } + + // Mix of known and unknown ranks + SymbolicShape(const std::vector>& dims) { + std::vector shape_symbols; + shape_symbols.reserve(dims.size()); + for(std::optional dim: dims) { + if(!dim) { + shape_symbols.push_back(ShapeSymbol::newSymbol()); + } else { + shape_symbols.push_back(ShapeSymbol::fromStaticSize(*dim)); + } + } + dims_ = shape_symbols; + } + + void dump() const; + + SymbolicShape(std::vector dims) : dims_(std::move(dims)) {} + + SymbolicShape(c10::IntArrayRef dims) { + std::vector shape_symbols; + shape_symbols.reserve(dims.size()); + for(int64_t dim : dims) { + shape_symbols.push_back(ShapeSymbol::fromStaticSize(dim)); + } + dims_ = shape_symbols; + } + + ShapeSymbol operator[](size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + ShapeSymbol at(size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + // Returns rank or nullopt in case of unranked shape. + std::optional rank() const { + if(!dims_) { + return std::nullopt; + } + return dims_->size(); + } + + std::optional> sizes() const { + return dims_; + } + + std::optional> symbolicDims() const { + if (!dims_) { + return std::nullopt; + } + auto symbolic_dims = std::vector(); + for (const ShapeSymbol& s : *dims_) { + symbolic_dims.push_back(!s.is_static()); + } + return symbolic_dims; + } + + // Checks whether the shape is fully defined/complete, ie. rank and sizes + // of every dimension are known. + bool isComplete() const { + if(!dims_) { + return false; + } + for(auto d : *dims_) { + if(!d.is_static()) { + return false; + } + } + return true; + } + + // Create new SymbolicShape that is result of merging self and another + // SymbolicShape. Only dimensions that are static and equal will be + // preserved. + // If either of two shapes are of unknown rank or they have unmatching rank, + // result will be unranked. + SymbolicShape merge(const SymbolicShape& other) const; + + friend bool operator==(const SymbolicShape& lhs, const SymbolicShape& rhs) { + return lhs.dims_ == rhs.dims_; + } + + friend bool operator!=(const SymbolicShape& lhs, const SymbolicShape& rhs) { + return !(lhs == rhs); + } + + private: + std::optional> dims_; +}; + +namespace detail { +inline bool isComplete(const Stride& s) { + return s.isComplete(); +} + +template +inline bool isComplete(const T& /*t*/) { + return true; +} +} + +template +struct VaryingShape { + using ListOfOptionalElements = std::vector>; + VaryingShape(const std::vector& vec) + : VaryingShape(ListOfOptionalElements(vec.begin(), vec.end())) {} + + VaryingShape(c10::ArrayRef vec) + : VaryingShape(ListOfOptionalElements(vec.begin(), vec.end())) {} + + VaryingShape(std::optional size = std::nullopt) : dims_(std::nullopt) { + if (size) { + dims_ = ListOfOptionalElements(*size); + } + } + + VaryingShape(ListOfOptionalElements dims) : dims_(std::move(dims)) {} + + VaryingShape(size_t size) : VaryingShape(std::optional(size)) {} + + bool operator==(const VaryingShape& other) const { + return dims_ == other.dims_; + } + + const std::optional &operator[](size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + std::optional size() const { + if (!dims_) { + return std::nullopt; + } + const auto& dims = dims_.value(); + return dims.size(); + } + + const std::optional& sizes() const { + return dims_; + } + + TORCH_API VaryingShape merge(const VaryingShape& other) const; + + std::optional> concrete_sizes() const { + if (!dims_) { + return std::nullopt; + } + std::vector sizes; + sizes.reserve(dims_.value().size()); + for (auto d : *dims_) { + if (!d) { + return std::nullopt; + } + sizes.push_back(d.value()); + } + return sizes; + } + + bool isComplete() const { + if (!dims_) { + return false; + } + for (auto d : *dims_) { + if (!d || !detail::isComplete(*d)) { + return false; + } + } + return true; + } + + private: + std::optional dims_; +}; + +struct TensorType; +// TODO: investigate making this SingletonOrSharedTypePtr +using TensorTypePtr = std::shared_ptr; +// This type represents a single Tensor with a specific size +struct TORCH_API TensorType : public SharedType { + static TensorTypePtr create(const at::Tensor& t); + + // used by TensorType::create(size_t dim) which in turn used by + // shape_analysis.cpp + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + const VaryingShape& sizes, + const VaryingShape& strides, + std::optional requires_grad, + std::optional undefined = false, + bool tensor_contiguity = false); + + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + SymbolicShape sizes, + VaryingShape stride_, + std::optional requires_grad, + std::optional undefined = false); + + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + std::optional dim, + std::optional requires_grad); + + // overloaded create variadic template argument as it could not distinguish + // initializer list + static TensorTypePtr createContiguous( + at::ScalarType scalar_type, + at::Device device, + at::IntArrayRef sizes); + + static TypePtr fromNumberType(const Type& typ); + static TypePtr fromBoolType(); + + std::optional dim() const { + return sizes().size(); + } + + VaryingShape sizes() const; + + VaryingShape strides() const; + + const VaryingShape& stride_properties() const { + return strides_; + } + + const std::optional& device() const { + return device_; + } + const std::optional& scalarType() const { + return scalar_type_; + } + const std::optional& requiresGrad() const { + return requires_grad_; + } + bool requires_grad() const override { + return requires_grad_ ? *requires_grad_ : true; + } + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::string str() const override; + + std::string repr_str() const override { + if (isInferredType()) { + return str() + " (inferred)"; + } else { + return str(); + } + } + + std::optional numel() const { + size_t prod = 1; + const auto& shape = sizes(); + + for (size_t i = 0; i < shape.size(); i++) { + auto const &s = shape[i]; + if (!s.has_value()) { + return std::optional{}; + } + prod *= s.value(); + } + return prod; + } + + TensorTypePtr withRequiresGrad(std::optional s) { + auto copy = clone(); + copy->requires_grad_ = s; + return copy; + } + + TensorTypePtr withScalarType(std::optional st) { + auto copy = clone(); + copy->scalar_type_ = st; + return copy; + } + + TensorTypePtr withDim(std::optional d) { + auto copy = clone(); + // withDim is only used by the legacy executor + // that only cares about the rank, so create dummy symbols)) : + copy->sizes_ = SymbolicShape(d); + copy->strides_ = VaryingShape(d); + return copy; + } + + TensorTypePtr withStrides(VaryingShape sstrides) const { + auto cloned = clone(); + cloned->strides_ = std::move(sstrides); + return cloned; + } + + TensorTypePtr withSizesStrides( + at::IntArrayRef sizes, + at::IntArrayRef strides) const { + auto cloned = clone(); + auto ssizes = SymbolicShape(sizes); + cloned->sizes_ = ssizes; + cloned->strides_ = computeStrideProps(sizes, strides); + return cloned; + } + + TensorTypePtr withSymbolicShapes(SymbolicShape ssizes) const { + auto cloned = clone(); + cloned->sizes_ = std::move(ssizes); + return cloned; + } + + TensorTypePtr withSizes(at::IntArrayRef sizes) const { + return withSizesStrides( + sizes, contiguousStridesOf(sizes)); + } + + TensorTypePtr withDevice(const std::optional device) const { + auto copy = clone(); + copy->device_ = device; + return copy; + } + + TensorTypePtr dimensionedOnly() const { + auto copy = clone(); + copy->sizes_ = SymbolicShape(sizes().size()); + copy->strides_ = VaryingShape(sizes().size()); + return copy; + } + + TensorTypePtr contiguous() const { + auto cloned = clone(); + auto concrete_sizes = sizes().concrete_sizes(); + TORCH_INTERNAL_ASSERT(concrete_sizes.has_value()); + auto strides = computeStrideProps( + *concrete_sizes, + contiguousStridesOf(*concrete_sizes)); + cloned->strides_ = strides; + return cloned; + } + + const SymbolicShape& symbolic_sizes() const; + + TensorTypePtr merge(const TensorType& other, bool merge_sizes = true) const; + + bool matchTensor(const at::Tensor& t); + + // is all information about the type specified except for autograd? + // This replaces the notion of a 'CompleteTensorType' that used to exist + // in the type-hierarchy. Excluding require_grad and undefined allows + // this to match the old behavior. + bool isComplete() const { + return scalar_type_ && device_ && sizes_.isComplete() && strides_.isComplete(); + } + + bool isInferredType() const { + return is_inferred_; + } + + static TensorTypePtr getInferred() { + static auto valueInferred = TensorType::create( + /*scalar_type=*/{}, + /*device=*/{}, + /*sizes=*/SymbolicShape(), + /*stride=*/VaryingShape{}, + /*requires_grad=*/{}, + /*undefined=*/false); + valueInferred->is_inferred_ = true; + return valueInferred; + } + + // this property is used by GuardElimination + // please see `checkInputs` for more details + bool isSummarized() const { + return !(isComplete() && requiresGrad().has_value() && + undefined().has_value()); + } + + TensorTypePtr withUndefined() { + auto r = clone(); + r->undefined_ = true; + return r; + } + + TensorTypePtr withPossiblyUndefined() { + auto r = clone(); + r->undefined_ = std::nullopt; + return r; + } + + std::optional undefined() const { return undefined_; } + + static const TensorTypePtr& get(); + + static const TypeKind Kind = TypeKind::TensorType; + + static std::vector contiguousStridesOf( + at::IntArrayRef in_sizes, + at::MemoryFormat memory_format = MemoryFormat::Contiguous) { + auto contiguous_fn = [](const at::IntArrayRef& sizes, + const std::vector& dim_order) { + std::vector strides(sizes.size()); + if (sizes.empty()) // zero-dim case + return strides; + + strides[dim_order[0]] = 1; + for (size_t i = 1; i < dim_order.size(); i++) { + auto cur_dim = dim_order[i]; + auto pre_dim = dim_order[i - 1]; + strides[cur_dim] = strides[pre_dim] * sizes[pre_dim]; + } + return strides; + }; + + std::vector dim_order(in_sizes.size()); + if (memory_format == MemoryFormat::ChannelsLast) { + dim_order = {1, 3, 2, 0}; + } else if (memory_format == MemoryFormat::ChannelsLast3d) { + dim_order = {1, 4, 3, 2, 0}; + } else { + auto ndims = in_sizes.size(); + for (size_t i = 0; i < ndims; i++) { + dim_order[i] = static_cast(ndims - i - 1); // Reverse + } + } + return contiguous_fn(in_sizes, dim_order); + } + + private: + TensorType( + std::optional scalar_type, + std::optional device, + SymbolicShape sizes, + VaryingShape strides, + std::optional requires_grad, + std::optional undefined = false); + + TensorTypePtr clone() const { + return TensorTypePtr(new TensorType( + scalar_type_, device_, sizes_, strides_, requires_grad_, undefined_)); + } + + static VaryingShape computeStrideProps( + at::IntArrayRef sizes, + at::IntArrayRef strides, + bool tensor_contiguity = false); + + std::optional scalar_type_; + std::optional device_; + SymbolicShape sizes_; + VaryingShape strides_; + std::optional requires_grad_; + // we exploit the fact certain tensors must be zero in the autograd to + // optimize gradient computation. Such zero tensors are currently implemented + // with `UndefinedTensorImpl.` They can be handled only by special operators + // (e.g. `AutogradAdd`) and their `Tensor::defined()` property returns false. + // Normally, `undefined_` is set to false, unless a type was created + // with `withUndefined` + // This will also mean that `undefined` tensors will fail + // `subtypeOf(TensorType::get())` check + // undefined_ may become `std::nullopt` if the tensor was observed to be both + // defined and undefined. However, no tensor type starts out with + // `undefined_` set to `std::nullopt` + std::optional undefined_; + // Represents whether or not this type was inferred. + bool is_inferred_ = false; +}; + +struct ListType; +using ListTypePtr = std::shared_ptr; +struct TORCH_API ListType + : public SingleElementType { + // It's not exactly a singleton, but there should be exactly one instance of + // List[T] for every T + friend struct Type; + template + static ListTypePtr create(T&&... all) { + return ListTypePtr( + new ListType(std::forward(all)...)); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << getElementType()->str() << "[]"; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + // global singleton + // Given an inner type T and an identifier, + // this function will return the global singleton type pointer + // the type List. + // The extra "identifier" argument is needed because we have multiple container types + // that all reuse this function (List, array, etc.) + static TypePtr get(const std::string& identifier, TypePtr inner); + + // common cast List[Tensor] + static ListTypePtr ofTensors(); + static ListTypePtr ofOptionalTensors(); + static ListTypePtr ofInts(); + static ListTypePtr ofSymInts(); + static ListTypePtr ofFloats(); + static ListTypePtr ofComplexDoubles(); + static ListTypePtr ofBools(); + static ListTypePtr ofStrings(); + static ListTypePtr ofNumbers(); + + private: + ListType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "List[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct DictType; +using DictTypePtr = std::shared_ptr; +struct TORCH_API DictType : public SharedType { + friend struct Type; + static const TypeKind Kind = TypeKind::DictType; + + static DictTypePtr create(TypePtr key, TypePtr value) { + auto kind = key->kind(); + if (auto dyn = key->castRaw()) { + kind = dyn->dynamicKind(); + } + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (kind) { + case TypeKind::AnyType: + case TypeKind::IntType: + case TypeKind::BoolType: + case TypeKind::FloatType: + case TypeKind::ComplexType: + case TypeKind::StringType: + case TypeKind::TensorType: + case TypeKind::DeviceObjType: + return DictTypePtr(new DictType(std::move(key), std::move(value))); + default: + TORCH_CHECK(false, + "Cannot create dict for key type '", + key->str(), + "', only int, float, complex, Tensor, device and string keys are supported"); + } + C10_DIAGNOSTIC_POP() + } + + // aligned with the format in FunctionSchema + std::string str() const override { + std::stringstream ss; + ss << "Dict(" << getKeyType()->str() << ", " << getValueType()->str() + << ')'; + return ss.str(); + } + + TypePtr createWithContained( + std::vector contained_types) const override { + TORCH_CHECK(contained_types.size() == 2, "Expected 2 contained types"); + return create(std::move(contained_types.at(0)), std::move(contained_types.at(1))); + } + + const TypePtr& getKeyType() const { + return types.at(0); + } + + const TypePtr& getValueType() const { + return types.at(1); + } + + bool hasFreeVariables() const override { + return has_free_variables; + } + + at::ArrayRef containedTypes() const override { + return types; + } + + bool equals(const Type& rhs) const override { + if (auto* dict_rhs = rhs.castRaw()) { + return *getKeyType() == *(dict_rhs->getKeyType()) && + *getValueType() == *(dict_rhs->getValueType()); + } + return false; + } + + // global singleton + // Given an inner type T and an identifier, + // this function will return the global singleton type pointer + // the type List. + // The extra "identifier" argument is needed because we have multiple container types + // that all reuse this function (Dict and unordered_map) + static TypePtr get(const std::string& identifier, TypePtr key, TypePtr val); + + private: + DictType(TypePtr key, TypePtr value) + : SharedType(TypeKind::DictType), + has_free_variables( + key->hasFreeVariables() || value->hasFreeVariables()) { + types.reserve(2); + types.push_back(std::move(key)); + types.push_back(std::move(value)); + } + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + + std::vector types; + bool has_free_variables; +}; + +struct FutureType; +using FutureTypePtr = std::shared_ptr; + +struct TORCH_API FutureType + : public SingleElementType { + friend struct Type; + template + static FutureTypePtr create(TypePtr elem) { + return FutureTypePtr( + new FutureType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "Future(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + if (Type::isSubtypeOfExt(rhs, why_not)) { + return true; + } + if (auto rhs_ = rhs.castRaw()) { + return getElementType()->isSubtypeOfExt(*rhs_->getElementType(), why_not); + } + return false; + } + + private: + FutureType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Future[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct AwaitType; +using AwaitTypePtr = std::shared_ptr; + +struct TORCH_API AwaitType + : public SingleElementType { + friend struct Type; + template + static AwaitTypePtr create(TypePtr elem) { + return AwaitTypePtr( + new AwaitType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "Await(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + if (Type::isSubtypeOfExt(rhs, why_not)) { + return true; + } + if (auto rhs_ = rhs.castRaw()) { + return getElementType()->isSubtypeOfExt(*rhs_->getElementType(), why_not); + } + return false; + } + + private: + AwaitType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Await[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct RRefType; +using RRefTypePtr = std::shared_ptr; + +struct TORCH_API RRefType + : public SingleElementType { + friend struct Type; + template + static RRefTypePtr create(TypePtr elem) { + return RRefTypePtr( + new RRefType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "RRef(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + private: + RRefType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "RRef[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +// Any should never appear in a named type like a class, namedtuple or +// interface. If it does, then dynamic type information will be lost in the +// Pickler, leading to hard-to-track-down bugs that will only occur +// after saving or loading a model. This is because we rely on the +// static types in named types to reconstruct type tags of loaded +// values. Lifting this restriction requires solving the serialization +// problem first. +TORCH_API void checkNoAny( + const Type& base, + const char* what, + const std::string& attrname, + const TypePtr& attrtype); + +struct TupleType; +using TupleTypePtr = std::shared_ptr; +using NameList = std::vector; +// This type represents a Tuple +struct TORCH_API TupleType : public NamedType { + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types, + std::vector& field_defaults); + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types); + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types); + + static TupleTypePtr create( + std::vector types) { + return TupleTypePtr(new TupleType( + std::move(types), + std::nullopt, + nullptr)); // NOLINT(modernize-make-shared) + } + static TupleTypePtr create() { + return create({}); + } + + at::ArrayRef elements() const { + return elements_; + } + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs_, std::ostream* why_not) const override; + + std::string str() const override; + bool hasFreeVariables() const override { + return has_free_variables_; + } + at::ArrayRef containedTypes() const override { + return elements_; + } + TypePtr createWithContained( + std::vector contained_types) const override { + return std::shared_ptr( + new TupleType(std::move(contained_types), name(), schema())); + } + const std::shared_ptr& schema() const { + return schema_; + } + std::optional> names() const; + + static const TypeKind Kind = TypeKind::TupleType; + + private: + template + static TupleTypePtr createWithSpec( + const std::optional& name, + const std::vector& field_names, + const std::vector& field_types, + std::vector& field_defaults); + + TupleType( + std::vector elements_, + std::optional name, + std::shared_ptr schema); + + bool compare( + const Type& rhs, + const std::function& fn) const { + if (rhs.kind() != kind()) { + return false; + } + + const auto& l_elements = elements(); + const auto& r_elements = rhs.castRaw()->elements(); + if (l_elements.size() != r_elements.size()) + return false; + for (size_t i = 0; i < l_elements.size(); ++i) { + if (!fn(*l_elements[i], *r_elements[i])) + return false; + } + return true; + } + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + + std::vector elements_; + bool has_free_variables_; + std::shared_ptr schema_; +}; + +// the common supertype of all Enums, only used in operator registration. +// EnumType <: AnyEnumType for all Enums +struct AnyEnumType; +using AnyEnumTypePtr = SingletonTypePtr; +struct TORCH_API AnyEnumType final : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "AnyEnumType"; + } + static const TypeKind Kind = TypeKind::AnyEnumType; + // global singleton + static AnyEnumTypePtr get(); +private: + AnyEnumType() + : Type(TypeKind::AnyEnumType) {} +}; + +struct NumberType; +using NumberTypePtr = SingletonTypePtr; +// This type represents a Python number +// Subtype hierarchy for Number Types (NumberType as the base type): +// IntType <: NumberType +// FloatType <: NumberType +// ComplexType <:NumberType +// +// WARNING: if you add a new subtype of NumberType that is not +// represented by a global singleton, you need to change NumberTypePtr +// to a SingletonOrSharedTypePtr and deal with NumberType needing to +// both inherit and not inherit from SharedType! +struct TORCH_API NumberType : public Type { + bool equals(const Type& rhs) const override; + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::string str() const override { + return "Scalar"; // match what PythonArgParser says for clarity + } + static const TypeKind Kind = TypeKind::NumberType; + // global singleton + static NumberTypePtr get(); + + protected: + NumberType(TypeKind kind = TypeKind::NumberType) : Type(kind) {} + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "number"; // technically not a valid python type, but + // we need to use it when parsing back in annotations + // for implicit conversions + } +}; + +struct FloatType; +using FloatTypePtr = SingletonTypePtr; +// This type represents a Python float number +struct TORCH_API FloatType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "float"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::FloatType; + // global singleton + static FloatTypePtr get(); + + private: + FloatType() : NumberType(TypeKind::FloatType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "float"; + } +}; + +struct ComplexType; +using ComplexTypePtr = SingletonTypePtr; +// This type represents a Python float number +struct TORCH_API ComplexType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "complex"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::ComplexType; + // global singleton + static ComplexTypePtr get(); + + private: + ComplexType() : NumberType(TypeKind::ComplexType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "complex"; + } +}; + +// We need to introduce `SymIntType` to represent the `SymInt` type +// used in function schemas e.g. `aten::narrow_copy(... SymInt length) +// `SymInt` will be used to enable tracing arithmetic operations on +// dimension values. Please see [SymInt.h] for more information +struct SymIntType; +using SymIntTypePtr = SingletonTypePtr; +struct TORCH_API SymIntType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymInt"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "int"; + } + static const TypeKind Kind = TypeKind::SymIntType; + // global singleton + static SymIntTypePtr get(); + + private: + SymIntType() : Type(TypeKind::SymIntType) {} +}; + +struct SymFloatType; +using SymFloatTypePtr = SingletonTypePtr; +struct TORCH_API SymFloatType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymFloat"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "float"; + } + static const TypeKind Kind = TypeKind::SymFloatType; + // global singleton + static SymFloatTypePtr get(); + + private: + SymFloatType() : Type(TypeKind::SymFloatType) {} +}; + +struct SymBoolType; +using SymBoolTypePtr = SingletonTypePtr; +struct TORCH_API SymBoolType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymBool"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "bool"; + } + static const TypeKind Kind = TypeKind::SymBoolType; + // global singleton + static SymBoolTypePtr get(); + + private: + SymBoolType() : Type(TypeKind::SymBoolType) {} +}; + +struct IntType; +using IntTypePtr = SingletonTypePtr; +// This type represents a Python int number +struct TORCH_API IntType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "int"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::IntType; + // global singleton + static IntTypePtr get(); + + private: + IntType() : NumberType(TypeKind::IntType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "int"; + } +}; + +struct BoolType; +using BoolTypePtr = SingletonTypePtr; +// This node represents a Python bool value +struct TORCH_API BoolType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "bool"; + } + static const TypeKind Kind = TypeKind::BoolType; + // global singleton + static BoolTypePtr get(); + + private: + BoolType() : Type(TypeKind::BoolType) {} +}; + +struct StringType; +using StringTypePtr = SingletonTypePtr; +// This type represents a Python string +struct TORCH_API StringType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + // we only use "str" (not "string") in both FunctionSchema and script + return annotation_str(); + } + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "str"; + } + static const TypeKind Kind = TypeKind::StringType; + // global singleton + static StringTypePtr get(); + + private: + StringType() : Type(TypeKind::StringType) {} +}; + +struct StorageType; +using StorageTypePtr = SingletonTypePtr; +struct TORCH_API StorageType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return annotation_str(); + } + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "Storage"; + } + static const TypeKind Kind = TypeKind::StorageType; + // global singleton + static StorageTypePtr get(); + + private: + StorageType() : Type(TypeKind::StorageType) {} +}; + +struct FunctionType; +using FunctionTypePtr = std::shared_ptr; +struct TORCH_API FunctionType : public NamedType { + static FunctionTypePtr create(torch::jit::Function* function) { + return FunctionTypePtr( + new FunctionType(function)); // NOLINT(modernize-make-shared) + } + bool equals(const Type& rhs) const override { + if (auto func_type = rhs.cast()) { + return func_type->function_ == function_; + } + + return false; + } + std::string str() const override { + return "Function"; + } + torch::jit::Function* function() const { + return function_; + } + static const TypeKind Kind = TypeKind::FunctionType; + + private: + FunctionType(torch::jit::Function* function); + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + torch::jit::Function* function_; +}; + +struct NoneType; +using NoneTypePtr = SingletonTypePtr; +// This type represents a Python None +struct TORCH_API NoneType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "NoneType"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream *why_not) const override; + + static const TypeKind Kind = TypeKind::NoneType; + // global singleton + static NoneTypePtr get(); + + private: + NoneType() : Type(TypeKind::NoneType) {} +}; + +struct GeneratorType; +using GeneratorTypePtr = SingletonTypePtr; +// This type represents a Generator +struct TORCH_API GeneratorType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Generator"; + } + static const TypeKind Kind = TypeKind::GeneratorType; + // global singleton + static GeneratorTypePtr get(); + + private: + GeneratorType() : Type(TypeKind::GeneratorType) {} +}; + +struct QuantizerType; +using QuantizerTypePtr = SingletonTypePtr; +// This type represents a Quantizer +struct TORCH_API QuantizerType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Quantizer"; + } + static const TypeKind Kind = TypeKind::QuantizerType; + // global singleton + static QuantizerTypePtr get(); + + private: + QuantizerType() : Type(TypeKind::QuantizerType) {} +}; + +struct QSchemeType; +using QSchemeTypePtr = SingletonTypePtr; +// This type represents a QScheme +struct TORCH_API QSchemeType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "QScheme"; + } + static const TypeKind Kind = TypeKind::QSchemeType; + // global singleton + static QSchemeTypePtr get(); + + private: + QSchemeType() : Type(TypeKind::QSchemeType) {} +}; + +struct DeviceObjType; +using DeviceObjTypePtr = SingletonTypePtr; +// This type represents a Device +struct TORCH_API DeviceObjType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Device"; + } + static const TypeKind Kind = TypeKind::DeviceObjType; + // global singleton + static DeviceObjTypePtr get(); + + private: + DeviceObjType() : Type(TypeKind::DeviceObjType) {} +}; + +struct StreamObjType; +using StreamObjTypePtr = SingletonTypePtr; +// This type represents a Generator +struct TORCH_API StreamObjType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Stream"; + } + static const TypeKind Kind = TypeKind::StreamObjType; + // global singleton + static StreamObjTypePtr get(); + +private: + StreamObjType() : Type(TypeKind::StreamObjType) {} +}; + +struct VarType; +using VarTypePtr = std::shared_ptr; +// This type represents a type variable, used in FunctionSchema +struct VarType : public SharedType { + static VarTypePtr create(std::string name_) { + return VarTypePtr(new VarType(std::move(name_))); + } + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return name(); + } + const std::string& name() const { + return name_; + } + bool hasFreeVariables() const override { + return true; + } + static const TypeKind Kind = TypeKind::VarType; + + private: + VarType(std::string name_) + : SharedType(TypeKind::VarType), name_(std::move(name_)) {} + std::string name_; +}; + +struct CapsuleType; +using CapsuleTypePtr = SingletonTypePtr; +// This type represents a Python Capsule. +// It does not appear in the IR and is only used during runtime +struct TORCH_API CapsuleType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Capsule"; + } + static const TypeKind Kind = TypeKind::CapsuleType; + // global singleton + static CapsuleTypePtr get(); +private: + CapsuleType() + : Type(TypeKind::CapsuleType) {} +}; + +struct PyObjectType; +using PyObjectTypePtr = SingletonTypePtr; +// This type represents a PyObject Type +struct TORCH_API PyObjectType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "PyObject"; + } + static const TypeKind Kind = TypeKind::PyObjectType; + // global singleton + static PyObjectTypePtr get(); +private: + PyObjectType() + : Type(TypeKind::PyObjectType) {} +}; + +enum class TypeVerbosity { + None, + Type, + TypeAndStride, + Full, + Symbolic, + Default = Full, +}; + +TORCH_API TypeVerbosity type_verbosity(); + +TORCH_API std::ostream& operator<<(std::ostream& out, const Type& t); +template +TORCH_API std::ostream& operator<<( + std::ostream& out, + const VaryingShape& t); +TORCH_API std::ostream& operator<<(std::ostream& os, const SymbolicShape& s); +TORCH_API std::ostream& operator<<(std::ostream& os, const ShapeSymbol& s); +TORCH_API std::ostream& operator<<(std::ostream& os, const Stride& s); +// what is the type, ignoring extra size/shape information? +// e.g. Tensor(2x3) -> Dynamic, and Tuple(Tensor(2x3),...) -> Tuple(Dynamic,...) + +// `unshapedType` is used to remove Tensor subtypes. We treat all Tensor +// subtypes as simply "Tensor"; we also create a new version of any +// container types in which internal Tensors have undergone the same +// operation. This is used for type comparisons between two Tensor types +// (`unshapedType` means that we don't falsely return `false` for e.g. +// Tensors of different dimensions). It's also used in the alias +// analysis pass. +// Be careful with calls because this can be very slow. If calling this +// on a graph, use `EraseShapeInformation` in shape_analysis.h +inline TypePtr unshapedType(const TypePtr& type) { + if (type->isSubtypeOf(*TensorType::get())) { + return TensorType::get(); + } + at::ArrayRef contained = type->containedTypes(); + if (contained.empty()) { + return type; + } + return type->withContained(fmap(type->containedTypes(), unshapedType)); +} + +inline TypePtr TensorType::fromNumberType(const Type& typ) { + if (typ.isSubtypeOf(*IntType::get())) { + return TensorType::createContiguous(at::kLong, at::kCPU, {}); + } else if (typ.isSubtypeOf(*FloatType::get())) { + return TensorType::createContiguous(at::kDouble, at::kCPU, {}); + } else if (typ.isSubtypeOf(*BoolType::get())) { + return TensorType::createContiguous(at::kBool, at::kCPU, {}); + } else if (typ.kind() == NumberType::Kind) { + return TensorType::create(std::nullopt, at::kCPU, {}, std::nullopt); + } + TORCH_CHECK(false, "Unknown number type: ", typ.str()); +} +inline TypePtr TensorType::fromBoolType() { + return TensorType::createContiguous(at::kBool, at::kCPU, {}); +} + +inline std::optional tryScalarTypeFromJitType(const Type& type) { + if (type == *FloatType::get()) { + return at::typeMetaToScalarType(c10::get_default_dtype()); + } else if (type == *IntType::get()) { + return at::ScalarType::Long; + } else if (type == *BoolType::get()) { + return at::ScalarType::Bool; + } + return std::nullopt; +} + +inline at::ScalarType scalarTypeFromJitType(const Type& type) { + auto result = tryScalarTypeFromJitType(type); + TORCH_CHECK( + result, + "Add new condition, expected Float, Complex, Int, or Bool but got", + type.str()); + return *result; +} + +// Attempt to find the correct supertype of the two types `t1` and `t2`. +// If no supertype is found, then nullopt will be returned if +// `default_to_union` is false, and `Union[t1, t2]` will be returned +// if it is true. If `t1 == t2`, or `t1` is a type refinement of `t2`, +// then `t2` will be returned (and vice versa). +// +// Two different tensortypes will return dynamic. +// +// Currently we chose not to support returning a NumberType for +// two types from the set of {FloatType, IntType, ComplexType}, because +// there is a lack of operator support for NumberType. +// +// If `type_hint` is an `InterfaceType`, then we can use that as a +// potential supertype for `ClassType`s in the list. Otherwise, we have +// no way to find and use some common interface type +TORCH_API std::optional unifyTypes( + const TypePtr& t1, + const TypePtr& t2, + bool default_to_union = false, + const TypePtr& type_hint = nullptr); + +TORCH_API std::optional unifyTypeList( + at::ArrayRef elements, + std::ostream& why_not, + bool default_to_union = false, + const TypePtr& type_hint = nullptr); + +namespace detail { +template +struct getTypePtr_ final { + static decltype(auto) call() { + return ([]() { + try { + return getCustomClassType(); + } catch(const c10::Error&) { + TORCH_CHECK( + false, + "Type ", + c10::util::get_fully_qualified_type_name(), + " could not be converted to any of the known types." + ); + } + }()); + } +}; + +template +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return getTypePtr_::call(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return AnyType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return TensorType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StorageType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StreamObjType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return FloatType::get(); + } +}; +template <> +struct getTypePtr_> final { + static decltype(auto) call() { + return ComplexType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymIntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymFloatType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return FloatType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymBoolType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return BoolType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return DeviceObjType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return BoolType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return NumberType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return QSchemeType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return TypeFactory::create( + TypeFactory::get()); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per vector" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("vector", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per ArrayRef" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("ArrayRef", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_ final { + static const auto& call() { + static auto type = ListType::create(getMaybeFakeTypePtr_::call()); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per List" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("List", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + static auto type = ListType::get("List", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per array" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + // (Concatenating the length onto the end of the string because we want a unique + // type_ptr created for every std::array type). + static auto type = ListType::get(std::string("array") + std::to_string(N), inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_key_type = getMaybeFakeTypePtr_::call(); + static auto inner_val_type = getMaybeFakeTypePtr_::call(); + // The "per unordered_map" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = DictType::get("unordered_map", inner_key_type, inner_val_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_key_type = getMaybeFakeTypePtr_::call(); + static auto inner_val_type = getMaybeFakeTypePtr_::call(); + // The "per Dict" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = DictType::get("Dict", inner_key_type, inner_val_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = OptionalType::get(inner_type); + return type; + } +}; + + +template<> +struct getTypePtr_ final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = OptionalType::get(inner_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_ final { + static const auto& call() { + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto inner_type = getMaybeFakeTypePtr_::call(); + static auto type = OptionalType::get(inner_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto type = ([]() { + std::vector contained_types = { + (getMaybeFakeTypePtr_::call())... + }; + return TupleType::create(std::move(contained_types)); + })(); + return type; + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return NoneType::get(); + } +}; +} // namespace detail +template +inline decltype(auto) getTypePtr() { + // TODO: static_assert that a templated function exists, and throw a friendly + // error message if not + return detail::getMaybeFakeTypePtr_::call(); +} + +template +inline TypePtr getTypePtrCopy() { + // TODO: static_assert that a templated function exists, and throw a friendly + // error message if not + return getTypePtr(); +} + +template +inline decltype(auto) getFakeTypePtr() { + return detail::getMaybeFakeTypePtr_::call(); +} + +template +inline TypePtr getFakeTypePtrCopy() { + return getFakeTypePtr(); +} + +using TypeEnv = std::unordered_map; +struct MatchTypeReturn { + MatchTypeReturn(std::string reason) : reason_(std::move(reason)) {} + static MatchTypeReturn Success() { + return MatchTypeReturn(); + } + bool success() const { + return !reason_.has_value(); + } + const std::string& reason() const { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return reason_.value(); + } + + private: + MatchTypeReturn() + : reason_(std::nullopt) {} + std::optional reason_; // is there is no match, this contains the reason +}; + +// attempt to match the type variables in formal to actual, adding them to type_env. +// If no match is possible this returns a MatchTypeReturn with r.success() == false +// and a r.reason() that describes why it could not match. +// note: It is possible to successfully match a formal, but for type variables +// in the formal to still not be defined. In particular, None matches Optional[T] +// but does not define the value of T. +TORCH_API MatchTypeReturn +matchTypeVariables(const TypePtr& formal, const TypePtr& actual, TypeEnv& type_env); + +// replace type variables appearing in `type` with the values in +// `type_env`. Returns nullptr if a variable used in `type` +// does not appear in `type_env` +TORCH_API TypePtr tryEvalTypeVariables(const TypePtr& type, TypeEnv& type_env); + +TORCH_API bool elementTypeCanBeInferredFromMembers(const TypePtr& elem_type); + +struct InterfaceType; +using InterfaceTypePtr = std::shared_ptr; + +// Interfaces are a list of abstract methods that a class might meet. +// If a class provides those methods, it implicitly meets the interface. + +// Subtype relations for Interface with ClassType: +// lhs (ClassType or InterfaceType) is a subtype of rhs if: +// 1. lhs methods are a superset of rhs methods +// 2. if rhs is module interface, the lhs must be module interface or module itself +struct TORCH_API InterfaceType : public NamedType { + static InterfaceTypePtr create( + QualifiedName qualifiedName, bool is_module=false); + + bool equals(const Type& rhs) const override { + if (auto user_rhs = rhs.castRaw()) { + return isSubTypeImpl(*this, *user_rhs, nullptr) && + isSubTypeImpl(*user_rhs, *this, nullptr); + } + return false; + } + + std::string str() const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return std::string("InterfaceType<") + name()->name() + ">"; + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + // try to find a method of this interface, + // returns nullptr if not found. + const FunctionSchema* getMethod(const std::string& name) const; + void addMethod(FunctionSchema schema); + const std::vector& methods() const { + return *methods_; + } + + bool is_module() const override{ + return is_module_; + } + static const TypeKind Kind = TypeKind::InterfaceType; + ~InterfaceType() override = default; + private: + InterfaceType(QualifiedName name, bool is_module); + static bool isSubTypeImpl( + const InterfaceType& lhs, + const InterfaceType& rhs, + std::ostream* why_not); + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + + // shared_ptr so that this header does not have to depend on + // FunctionSchema.h + std::shared_ptr> methods_; + // flag to distinguish if it's an interface type from a module or not + bool is_module_; +}; + +template +struct EnumerationType : public Type { +static const TypeKind Kind = K; + +bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); +} + +protected: +EnumerationType() : Type(Kind) {} +}; + +// WARNING: These enumeration types below DO NOT actually get parsed out +// from the logical schema strings, instead they are mapped as ints. To +// observe these types, use real_type() instead of type() on Argument + +struct ScalarTypeType; +using ScalarTypeTypePtr = SingletonTypePtr; +struct TORCH_API ScalarTypeType : public EnumerationType { +std::string str() const override { +return "ScalarType"; +} +static const TypeKind Kind = TypeKind::ScalarTypeType; +// global singleton +static ScalarTypeTypePtr get(); + +private: +ScalarTypeType() {} +}; + +struct MemoryFormatType; +using MemoryFormatTypePtr = SingletonTypePtr; +struct TORCH_API MemoryFormatType : public EnumerationType { +std::string str() const override { +return "MemoryFormat"; +} +static const TypeKind Kind = TypeKind::MemoryFormatType; +// global singleton +static MemoryFormatTypePtr get(); + +private: +MemoryFormatType() {} +}; + +struct LayoutType; +using LayoutTypePtr = SingletonTypePtr; +struct TORCH_API LayoutType : public EnumerationType { +std::string str() const override { +return "Layout"; +} +static const TypeKind Kind = TypeKind::LayoutType; +// global singleton +static LayoutTypePtr get(); + +private: +LayoutType() {} +}; + +namespace detail { +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return ScalarTypeType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return LayoutType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return MemoryFormatType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +} // namespace detail + +// the common supertype of all lists, +// List[T] <: AnyList for all T +struct AnyListType; +using AnyListTypePtr = SingletonTypePtr; +struct TORCH_API AnyListType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "list"; + } + static const TypeKind Kind = TypeKind::AnyListType; + // global singleton + static AnyListTypePtr get(); +private: + AnyListType() + : Type(TypeKind::AnyListType) {} +}; + +// the common supertype of all tuples, +// Tuple[T...] <: AnyTuple for all T +struct AnyTupleType; +using AnyTupleTypePtr = SingletonTypePtr; +struct TORCH_API AnyTupleType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + + std::string str() const override { + return "tuple"; + } + static const TypeKind Kind = TypeKind::AnyTupleType; + + // global singleton + static AnyTupleTypePtr get(); +private: + AnyTupleType() + : Type(TypeKind::AnyTupleType) {} +}; + +// the common supertype of all classes, +// ClassType <: AnyClassType for all classes +struct AnyClassType; +using AnyClassTypePtr = SingletonTypePtr; +struct TORCH_API AnyClassType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "AnyClassType"; + } + static const TypeKind Kind = TypeKind::AnyClassType; + // global singleton + static AnyClassTypePtr get(); +private: + AnyClassType() + : Type(TypeKind::AnyClassType) {} +}; + +template<> +inline detail::CastReturnType::type Type::cast() { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; +} + +template<> +inline detail::CastConstReturnType::type Type::cast() const { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; +} + +template<> +inline const NamedType* Type::castRaw() const { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return static_cast(this); + } + return nullptr; +} + +// Used as a return type when inferring the IValue type of a Python object. +struct InferredType { + /* implicit */ InferredType(TypePtr type) : type_(std::move(type)) {} + /* implicit */ InferredType(std::string reason) + : type_(nullptr), reason_(std::move(reason)) {} + TypePtr type() const { + TORCH_INTERNAL_ASSERT( + type_, + "Tried to get the type from an InferredType but the type is null. ", + "Reason: ", + reason_); + return type_; + } + bool success() const { + return type_ != nullptr; + } + const std::string& reason() const { + TORCH_INTERNAL_ASSERT(!type_); + return reason_; + } + +private: + TypePtr type_; + std::string reason_; +}; + +TORCH_API bool containsAnyType(const TypePtr& type); + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h new file mode 100644 index 0000000000000000000000000000000000000000..c7ffbb33786aa5d1118243b047836a189817ac67 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h @@ -0,0 +1,602 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +#define C10_FORALL_TYPES(_) \ + _(AnyType) \ + _(EnumType) \ + _(AnyEnumType) \ + _(TensorType) \ + _(StorageType) \ + _(TupleType) \ + _(ListType) \ + _(DictType) \ + _(NumberType) \ + _(FloatType) \ + _(ComplexType) \ + _(FutureType) \ + _(AwaitType) \ + _(RRefType) \ + _(IntType) \ + _(NoneType) \ + _(StringType) \ + _(GeneratorType) \ + _(QuantizerType) \ + _(BoolType) \ + _(OptionalType) \ + _(VarType) \ + _(DeviceObjType) \ + _(StreamObjType) \ + _(FunctionType) \ + _(ClassType) \ + _(PyObjectType) \ + _(CapsuleType) \ + _(InterfaceType) \ + _(QSchemeType) \ + _(ScalarTypeType) \ + _(LayoutType) \ + _(MemoryFormatType) \ + _(AnyListType) \ + _(AnyTupleType) \ + _(AnyClassType) \ + _(SymIntType) \ + _(SymFloatType) \ + _(SymBoolType) \ + _(UnionType) \ + _(DynamicType) + +enum class TypeKind { +#define DEFINE_TYPE(T) T, + C10_FORALL_TYPES(DEFINE_TYPE) +#undef DEFINE_TYPE +}; + +TORCH_API const char* typeKindToString(TypeKind kind); + +struct Type; +struct SharedType; + +// Use this to customize how a Type is printed using `annotation_str()`. If +// std::nullopt is returned, `annotation_str()` falls through to its default +// implementation. +using TypePrinter = std::function(const Type&)>; + +namespace detail { +template +struct IsSingletonType : public std::integral_constant {}; +} // namespace detail +#define TORCH_DECLARE_SINGLETON(Type) \ + struct Type; \ + namespace detail { \ + template <> struct IsSingletonType : public std::integral_constant {}; \ + } + +TORCH_DECLARE_SINGLETON(AnyType) +TORCH_DECLARE_SINGLETON(AnyEnumType) +TORCH_DECLARE_SINGLETON(NumberType) +TORCH_DECLARE_SINGLETON(FloatType) +TORCH_DECLARE_SINGLETON(ComplexType) +TORCH_DECLARE_SINGLETON(IntType) +TORCH_DECLARE_SINGLETON(BoolType) +TORCH_DECLARE_SINGLETON(StringType) +TORCH_DECLARE_SINGLETON(StorageType) +TORCH_DECLARE_SINGLETON(NoneType) +TORCH_DECLARE_SINGLETON(GeneratorType) +TORCH_DECLARE_SINGLETON(QuantizerType) +TORCH_DECLARE_SINGLETON(QSchemeType) +TORCH_DECLARE_SINGLETON(DeviceObjType) +TORCH_DECLARE_SINGLETON(StreamObjType) +TORCH_DECLARE_SINGLETON(CapsuleType) +TORCH_DECLARE_SINGLETON(PyObjectType) +TORCH_DECLARE_SINGLETON(ScalarTypeType) +TORCH_DECLARE_SINGLETON(LayoutType) +TORCH_DECLARE_SINGLETON(MemoryFormatType) +TORCH_DECLARE_SINGLETON(AnyListType) +TORCH_DECLARE_SINGLETON(AnyTupleType) +TORCH_DECLARE_SINGLETON(AnyClassType) + +namespace detail { +template +struct CastReturnType { + using type = std::shared_ptr; +}; + +template +struct CastReturnType::value>> { + using type = SingletonTypePtr; +}; + +template +struct CastConstReturnType { + using type = std::shared_ptr; +}; + +template +struct CastConstReturnType::value>> { + using type = SingletonTypePtr; +}; + +template +struct as_shared_type { + using type = SharedType*; +}; + +template +struct as_shared_type { + using type = const SharedType *; +}; +} // namespace detail + +struct TORCH_API Type { + friend TORCH_API bool operator==(const Type& lhs, const Type& rhs); + private: + TypeKind kind_; + + protected: + Type(TypeKind kind) : kind_(kind) {} + + Type(const Type&) = default; + Type& operator=(const Type&) = default; + Type(Type&&) noexcept = default; + Type& operator=(Type&&) noexcept = default; + + virtual std::string annotation_str_impl(const TypePrinter& /*printer*/) const { + return str(); + } + // a == b + virtual bool equals(const Type& rhs) const = 0; + // a == b <=> b == a + virtual bool symmetric() const { + return true; + } + + public: + template + class SingletonOrSharedTypePtr { + public: + using element_type = typename std::shared_ptr::element_type; + + SingletonOrSharedTypePtr() = default; + + /* implicit */ SingletonOrSharedTypePtr(std::shared_ptr x) + : repr_(std::move(x)) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(std::shared_ptr x) + : repr_(std::move(x)) {} + + /* implicit */ SingletonOrSharedTypePtr(std::nullptr_t) + : repr_(nullptr) {} + + /* implicit */ SingletonOrSharedTypePtr(SingletonTypePtr p) + : repr_(makeSingletonSharedPtr(p.get())) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(SingletonTypePtr p) + : repr_(makeSingletonSharedPtr(static_cast(p.get()))) {} + + + // We need to support construction from T* for pybind. The problem + // is that it's not clear if we are supposed to be taking shared + // ownership or not. + // + // Case 1: if T is known statically to derive from SharedType, we should use + // shared_from_this() and take shared_ownership. + // + // Case 2: if T is exactly Type, we need to do a dynamic_cast to + // check if it's a SharedType and do the right thing. + // + // Case 3: Otherwise, T is not a SharedType. Use a singleton + // pointer. + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) : SingletonOrSharedTypePtr(static_cast::type>(p)->shared_from_this()) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) { + if (auto* shared_p = dynamic_cast::type>(p)) { + repr_ = shared_p->shared_from_this(); + } else { + repr_ = makeSingletonSharedPtr(p); + } + } + + template && !std::is_base_of_v, bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) + : repr_(makeSingletonSharedPtr(p)) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dynamic_cast::type>(p) == nullptr); + } + + SingletonOrSharedTypePtr(const SingletonOrSharedTypePtr&) = default; + SingletonOrSharedTypePtr(SingletonOrSharedTypePtr&&) noexcept = default; + SingletonOrSharedTypePtr& operator=(const SingletonOrSharedTypePtr&) = default; + SingletonOrSharedTypePtr& operator=(SingletonOrSharedTypePtr&&) noexcept = default; + ~SingletonOrSharedTypePtr() = default; + + T* get() const { + return repr_.get(); + } + + operator bool() const { + return repr_ != nullptr; + } + + bool operator==(std::nullptr_t) const { + return repr_ == nullptr; + } + + bool operator!=(std::nullptr_t) const { + return repr_ != nullptr; + } + + template , void>, bool> = true> + U& operator*() const { + return *get(); + } + + T* operator->() const { + return get(); + } + + private: + // Use shared_ptr's aliasing constructor to create a non-owning pointer + // to a singleton. The lifetime is tied to the null shared_ptr, so there's + // no reference counting overhead for the singleton itself. + static std::shared_ptr makeSingletonSharedPtr(T* ptr) { + return std::shared_ptr(std::shared_ptr(), ptr); + } + + std::shared_ptr repr_; + }; + + using TypePtr = SingletonOrSharedTypePtr; + using Ptr = TypePtr; + using ElementType = Type; + + // subtyping relation. By default, we return true for the case + // when the type is exactly equal or if this <: T where rhs = Optional[T] + + // if this returns false and the why_not stream is non-null, it contains + // additional details that describe why this is not a subtype of 'rhs'. + // This additional information should only contain details that are not + // obvious from the annotation_str() that describes the type. For instance it + // is clear that `int <: str` is false but not clear why `Foo <: InterfaceBar` + // might be false. + virtual bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const; + virtual bool is_module() const; + bool isSubtypeOf(const Type& rhs) const { + return isSubtypeOfExt(rhs, nullptr); + } + // Compatibility shims to accommodate existing code that passes shared_ptrs + // around. Ideally, we would just delete this, but it should be harmless. + template + std::enable_if_t, bool> + isSubtypeOf(const std::shared_ptr& rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOf(const SingletonOrSharedTypePtr& rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOf(SingletonTypePtr rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(const SingletonOrSharedTypePtr& rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(const std::shared_ptr& rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(SingletonTypePtr rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + // How this type will appear in FunctionSchema declarations + virtual std::string str() const = 0; + + // How this type will appear as if it were a type annotation in Python + // which is sometimes different than how it appears in declarations (e.g. + // int[] vs List[int]) + // + // Takes a custom printer that users can pass in to customize the output of + // this method. + std::string annotation_str(const TypePrinter& printer) const { + if (printer) { + // the printer can return std::nullopt to fall through to the default impl + if (auto renamed = printer(*this)) { + return *renamed; + } + } + return annotation_str_impl(printer); + } + std::string annotation_str() const { + // Overload instead of define a default value for `printer` to help + // debuggers out. + return annotation_str(nullptr); + } + + // Returns a human readable string that includes additional information like + // "type is inferred rather than explicitly defined" to help construct more + // user-friendly messages. + virtual std::string repr_str() const { + return annotation_str(); + } + + TypeKind kind() const { + return kind_; + } + + virtual bool isUnionType() const { + return false; + } + + virtual bool requires_grad() const { + for (const auto& ct : containedTypes()) { + if (ct->requires_grad()) { + return true; + } + } + return false; + } + + // Dynamically cast this object to the subclass indicated by the + // template variable, returning nullptr if the cast is invalid. + template ::value, bool> = true> + typename detail::CastReturnType::type cast() { + if (T::Kind == kind()) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastReturnType::type cast() { + if (T::Kind == kind()) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this == T::get().get()); + return typename detail::CastReturnType::type(static_cast(this)); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastConstReturnType::type cast() const { + if (T::Kind == kind()) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastConstReturnType::type cast() const { + if (T::Kind == kind()) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this == T::get().get()); + return typename detail::CastConstReturnType::type(static_cast(this)); + } + return nullptr; + } + template + T* castRaw() { + if (T::Kind == kind()) { + return static_cast(this); + } + return nullptr; + } + template + const T* castRaw() const { + if (T::Kind == kind()) { + return static_cast(this); + } + return nullptr; + } + template + auto expect() { + auto r = cast(); + AT_ASSERT(r); + return r; + } + template + auto expect() const { + auto r = cast(); + AT_ASSERT(r); + return r; + } + template + T& expectRef() { + auto* r = castRaw(); + AT_ASSERT(r); + return *r; + } + template + const T& expectRef() const { + auto* r = castRaw(); + AT_ASSERT(r); + return *r; + } + virtual ~Type() = default; + virtual bool hasFreeVariables() const { + return false; + } + // list of types this type contains, e.g. for a List then element type of a + // list for a tuple, the types of the tuple elements + virtual at::ArrayRef containedTypes() const { + return {}; + } + virtual TypePtr containedType(size_t i) const { + return containedTypes().at(i); + } + virtual size_t containedTypeSize() const { + return containedTypes().size(); + } + // create a new version of this type, replacing its contained types with + // contained_types + TypePtr withContained(std::vector contained_types); + // per-type constructor, you only need to override this if the + // containedTypes() is not empty + virtual TypePtr createWithContained( + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::vector /*contained_types*/) const { + TORCH_CHECK(false, + "type with contained types did not overload createWithContained: ", + str()); + } + +}; + +template +using SingletonOrSharedTypePtr = Type::SingletonOrSharedTypePtr; + + +template +bool operator==(const SingletonOrSharedTypePtr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonOrSharedTypePtr& x, const std::shared_ptr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const std::shared_ptr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonOrSharedTypePtr& x, const SingletonTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonTypePtr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const std::shared_ptr& y) { + return !(x == y); +} + +template +bool operator!=(const std::shared_ptr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const SingletonTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonTypePtr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +using TypePtr = SingletonOrSharedTypePtr; +using ConstTypePtr = SingletonOrSharedTypePtr; + +// Explicitly enable MaybeOwned>, rather than allowing +// MaybeOwned to be used for any type right away. +template +struct MaybeOwnedTraits> + : public MaybeOwnedTraitsGenericImpl> {}; + +// Base class for Types that are guaranteed to be owned by std::shared_ptr. +struct TORCH_API SharedType : public Type, public std::enable_shared_from_this { + using Type::Type; +}; + +inline TypePtr Type::withContained(std::vector contained_types) { + auto current_contained = containedTypes(); + // Types with no contained_types don't need this call. Check before calling! + // + // (We can't support this efficiently because types without + // contained types may be singletons, in which case + // shared_from_this will crash; we would have to provide a virtual + // typeptr_from_this or isSingleton.) + TORCH_INTERNAL_ASSERT(!current_contained.empty() && current_contained.size() == contained_types.size()); + if (current_contained.equals(contained_types)) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return createWithContained(std::move(contained_types)); +} + + +inline bool operator==(const Type& lhs, const Type& rhs) { + if (C10_UNLIKELY(!rhs.symmetric())) { + return rhs.equals(lhs); + } + return lhs.equals(rhs); +} + +struct NamedType; +using NamedTypePtr = std::shared_ptr; +using ConstNamedTypePtr = std::shared_ptr; + +struct TORCH_API NamedType : public SharedType { + NamedType(TypeKind tk, std::optional name) + : SharedType(tk), name_(std::move(name)) { + TORCH_INTERNAL_ASSERT( + tk == TypeKind::TupleType || tk == TypeKind::FunctionType || + tk == TypeKind::ClassType || tk == TypeKind::InterfaceType || + tk == TypeKind::EnumType, + "If you add a new kind of NamedType, ", + "please update the cast specialization and this assert"); + } + + // Fully qualified name of type + // Looks like: "foo.bar.Baz". + const std::optional& name() const { + return name_; + } + + private: + std::optional name_; +}; + +} // namespace c10 + +namespace std { +template +struct hash> { + size_t operator()(const c10::SingletonOrSharedTypePtr& x) const { + return std::hash()(x.get()); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h new file mode 100644 index 0000000000000000000000000000000000000000..3dbd04bdea8b71e88fbebdced2eea6c10b295193 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// TODO: consider storing namespace separately too +struct OperatorName final { + std::string name; + std::string overload_name; + OperatorName(std::string name, std::string overload_name) + : name(std::move(name)), overload_name(std::move(overload_name)) {} + + // TODO: These two functions below are slow! Fix internal data structures so + // I don't have to manually reconstruct the namespaces! + + // Return the namespace of this OperatorName, if it exists. The + // returned string_view is only live as long as the OperatorName + // exists and name is not mutated + std::optional getNamespace() const { + auto pos = name.find("::"); + if (pos == std::string::npos) { + return std::nullopt; + } else { + return std::string_view(name.data(), pos); + } + } + + // Returns true if we successfully set the namespace + bool setNamespaceIfNotSet(const char* ns) { + if (!getNamespace().has_value()) { + const auto ns_len = strlen(ns); + const auto old_name_size = name.size(); + name.resize(ns_len + 2 + old_name_size); + // Shift current value of name to the end of the new space. + name.replace( + name.size() - old_name_size, old_name_size, name, 0, old_name_size); + name.replace(0, ns_len, ns, ns_len); + name[ns_len] = ':'; + name[ns_len + 1] = ':'; + return true; + } else { + return false; + } + } +}; + +// Non-owning view of an OperatorName. Unlike OperatorName, most of +// its functions are constexpr, so it can be used for compile time +// computations +struct OperatorNameView final { + std::string_view name; + std::string_view overload_name; + constexpr OperatorNameView( + std::string_view name, + std::string_view overload_name) + : name(name), overload_name(overload_name) {} + // Parses strings like "foo.overload" and also "foo" + constexpr static OperatorNameView parse(std::string_view full_name) { + auto i = full_name.find('.'); + if (i == std::string_view::npos) { + return OperatorNameView(full_name, std::string_view()); + } else { + return OperatorNameView(full_name.substr(0, i), full_name.substr(i + 1)); + } + } +}; + +inline bool operator==(const OperatorName& lhs, const OperatorName& rhs) { + return lhs.name == rhs.name && lhs.overload_name == rhs.overload_name; +} + +inline bool operator!=(const OperatorName& lhs, const OperatorName& rhs) { + return !operator==(lhs, rhs); +} + +TORCH_API std::string toString(const OperatorName& opName); +TORCH_API std::ostream& operator<<(std::ostream& /*os*/, const OperatorName& /*opName*/); + +} // namespace c10 + +namespace std { +template <> +struct hash<::c10::OperatorName> { + size_t operator()(const ::c10::OperatorName& x) const { + return std::hash()(x.name) ^ + (~std::hash()(x.overload_name)); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h new file mode 100644 index 0000000000000000000000000000000000000000..60e05fd9033486cc08999b939836359024313d70 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h @@ -0,0 +1,166 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +// Represents a name of the form "foo.bar.baz" +struct QualifiedName { + QualifiedName() = default; + + // `name` can be a dotted string, like "foo.bar.baz", or just a bare name. + /* implicit */ QualifiedName(const std::string& name) { + TORCH_CHECK(!name.empty()); + // split the string into its atoms. + size_t startSearchFrom = 0; + size_t pos = name.find(delimiter_, startSearchFrom); + + while (pos != std::string::npos) { + auto atom = name.substr(startSearchFrom, pos - startSearchFrom); + TORCH_INTERNAL_ASSERT( + !atom.empty(), "Invalid name for qualified name: '", name, "'"); + atoms_.push_back(std::move(atom)); + startSearchFrom = pos + 1; + pos = name.find(delimiter_, startSearchFrom); + } + + auto finalAtom = name.substr(startSearchFrom); + TORCH_INTERNAL_ASSERT( + !finalAtom.empty(), "Invalid name for qualified name: '", name, "'"); + atoms_.emplace_back(std::move(finalAtom)); + + cacheAccessors(); + } + + explicit QualifiedName(std::vector atoms) : atoms_(std::move(atoms)) { + for (const auto& atom : atoms_) { + TORCH_CHECK(!atom.empty(), "Atom cannot be empty"); + TORCH_CHECK( + atom.find(delimiter_) == std::string::npos, + "Delimiter not allowed in atom"); + } + + cacheAccessors(); + } + // Unnecessary copy. Ideally we'd use something like std::string_view. + /* implicit */ QualifiedName(const char* name) + : QualifiedName(std::string(name)) {} + + // `name` must be a bare name (no dots!) + explicit QualifiedName(const QualifiedName& prefix, std::string name) { + TORCH_INTERNAL_ASSERT(!name.empty()); + TORCH_INTERNAL_ASSERT(name.find(delimiter_) == std::string::npos); + atoms_.insert(atoms_.begin(), prefix.atoms_.begin(), prefix.atoms_.end()); + atoms_.push_back(std::move(name)); + + cacheAccessors(); + } + + // Is `this` a prefix of `other`? + // For example, "foo.bar" is a prefix of "foo.bar.baz" + bool isPrefixOf(const QualifiedName& other) const { + const auto& thisAtoms = atoms_; + const auto& otherAtoms = other.atoms_; + + if (thisAtoms.size() > otherAtoms.size()) { + // Can't be a prefix if it's bigger + return false; + } + for (const auto i : c10::irange(thisAtoms.size())) { + if (thisAtoms[i] != otherAtoms[i]) { + return false; + } + } + return true; + } + + // The fully qualified name, like "foo.bar.baz" + const std::string& qualifiedName() const { + return qualifiedName_; + } + + // The leading qualifier, like "foo.bar" + const std::string& prefix() const { + return prefix_; + } + + // The base name, like "baz" + const std::string& name() const { + return name_; + } + + const std::vector& atoms() const { + return atoms_; + } + + bool operator==(const QualifiedName& other) const { + return this->qualifiedName_ == other.qualifiedName_; + } + + bool operator!=(const QualifiedName& other) const { + return !(*this == other); + } + + private: + static constexpr char delimiter_ = '.'; + + // Helper for cacheAccessors() below. + template + std::string join(char delimiter, const T& v) { + std::string out; + size_t reserve = 0; + for (const auto& e : v) { + reserve += e.size() + 1; + } + out.reserve(reserve); + for (const auto i : c10::irange(v.size())) { + if (i != 0) { + out.push_back(delimiter); + } + out.append(v[i]); + } + return out; + } + + void cacheAccessors() { + qualifiedName_ = join(delimiter_, atoms_); + if (atoms_.size() > 1) { + ArrayRef view(atoms_); + const auto prefixView = view.slice(0, view.size() - 1); + prefix_ = join(delimiter_, prefixView); + } + + if (!atoms_.empty()) { + name_ = atoms_.back(); + } + } + + // The actual list of names, like "{foo, bar, baz}" + std::vector atoms_; + + /* + * Cached accessors, derived from `atoms_`. + */ + std::string qualifiedName_; + std::string prefix_; + std::string name_; +}; +} // namespace c10 + +namespace std { +template <> +struct hash { + size_t operator()(const c10::QualifiedName& n) const noexcept { + return std::hash()(n.qualifiedName()); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..b40fe9ed5c2e1e1bae0f0eac22c6cdd4f318c042 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +struct Type; +using worker_id_t = int16_t; + +// This abstract class contains only user-facing APIs, and will be shared +// between jit and distributed to implement TorchScript support. +class C10_EXPORT RRefInterface : public c10::intrusive_ptr_target { + public: + RRefInterface() = default; + // RRef is made NOT copyable NOT movable to prevent messing up reference + // counting. + RRefInterface(const RRefInterface& other) = delete; + RRefInterface(RRefInterface&& other) = delete; + RRefInterface& operator=(const RRefInterface& other) = delete; + RRefInterface& operator=(RRefInterface&& other) = delete; + + ~RRefInterface() override = default; + + // returns the worker id of the owner + virtual worker_id_t owner() const = 0; + + // returns the worker name of the owner + virtual std::string ownerName() const = 0; + + // Returns true if this is the ``OwnerRRef`` + virtual bool isOwner() const = 0; + + // Returns true if this is an ``OwnerRRef`` or if this ``UserRRef`` has been + // confirmed by its owner. + virtual bool confirmedByOwner() const = 0; + + virtual const TypePtr type() const = 0; +}; + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/stack.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/stack.h new file mode 100644 index 0000000000000000000000000000000000000000..6da9ad54ddcb22efa9185a31d95e30c94d822371 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/stack.h @@ -0,0 +1,209 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +// TODO move this to c10 namespace + + +namespace torch::jit { + +using c10::IValue; +using Stack = std::vector; + +class Operation { + template + using accepts = std::is_constructible, F&&>; + + public: + template ::value, int> = 0> + C10_DEPRECATED_MESSAGE("Please use void(Stack&) to register operator instead.") + Operation(F&& raw): op_([raw = std::forward(raw)](Stack& stack) { + raw(&stack); + }) {} + + template ::value && + !std::is_same_v, Operation>, int> = 0> + Operation(F&& op): op_(std::forward(op)) {} + + Operation(std::nullptr_t) noexcept {} + + explicit operator bool() const noexcept { + return op_ ? true : false; + } + + void operator()(Stack& stack) { + op_(stack); + } + + template + T* target() noexcept { + return op_.target(); + } + + private: + std::function op_; +}; + +// An operation with N inputs and M outputs pops the last N inputs off +// the stack and pushes its M inputs onto the stack +// before: I0, I1, ... IN <- stack.back() +// after: O0, O1, ... OM +// operations are defined this way so that ownership of inputs can be +// transferred to the operation and it can incrementally drop ownership of +// tensors when they become unneeded. For large operations, like 'run an entire +// subgraph', this functionality is very important for minimizing gpu memory +// usage return value is the relative 'offset' to jump to for the next +// operation: +// pc += 1 + offset +// so a return value of 0 goes to the next instruction + +// treat the last N elements of the stack as a list, looking up +// element i +inline IValue& peek(Stack& stack, size_t i, size_t N) { + // NOLINTNEXTLINE(*-narrowing-conversions) + return *(stack.end() - N + i); +} +inline IValue& peek(Stack* stack, size_t i, size_t N) { + return peek(*stack, i, N); +} +inline const IValue& peek(const Stack& stack, size_t i, size_t N) { + // NOLINTNEXTLINE(*-narrowing-conversions) + return *(stack.end() - N + i); +} +inline const IValue& peek(const Stack* stack, size_t i, size_t N) { + return peek(*stack, i, N); +} +// treat the last N elements of the stack as a list, looking up the +// slice starting at index i and having length len +inline at::ArrayRef peekSlice( + const Stack& stack, + size_t i, + size_t len, + size_t N) { + return at::ArrayRef(stack).slice(stack.size() - N + i, len); +} +inline at::ArrayRef last(const Stack& stack, size_t N) { + return peekSlice(stack, 0, N, N); +} +inline at::ArrayRef last(const Stack* stack, size_t N) { + return last(*stack, N); +} +inline void drop(Stack& stack, size_t n) { + // NOLINTNEXTLINE(*-narrowing-conversions) + stack.erase(stack.end() - n, stack.end()); +} +inline void drop(Stack* stack, size_t n) { + drop(*stack, n); +} +inline IValue pop(Stack& stack) { + TORCH_CHECK(!stack.empty(), "pop() called on empty stack"); + auto r = std::move(stack.back()); + stack.pop_back(); + return r; +} +inline IValue pop(Stack* stack) { + return pop(*stack); +} +inline std::vector pop(Stack& stack, size_t n) { + std::vector result; + result.reserve(n); + for (const auto i : c10::irange(n)) { + result.push_back(std::move(peek(stack, i, n))); + } + drop(stack, n); + return result; +} + +// variadic pop: +// int64_t a; at::Tensor b; +// pop(stack, a, b); +// equivalent to: +// b = pop(stack).toTensor(); +// a = pop(stack).toInt(); +template +inline void pop(Stack& stack, Types&... args) { + size_t i = 0; + constexpr size_t N = sizeof...(args); + (void)std::initializer_list{ + (args = std::move(peek(stack, i++, N)).template to(), 0)...}; + drop(stack, N); +} +template +inline void pop(Stack* stack, Types&... args) { + pop(*stack, args...); +} +template +inline void push_one(Stack& stack, Type&& arg) { + stack.emplace_back(std::forward(arg)); +} + +inline void push_one(Stack& stack, c10::TensorOptions options) { + stack.emplace_back(c10::typeMetaToScalarType(options.dtype())); + stack.emplace_back(options.layout()); + stack.emplace_back(options.device()); + stack.emplace_back(options.pinned_memory()); +} + +template +inline void push(Stack& stack, Types&&... args) { + (void)std::initializer_list{(push_one(stack, std::forward(args)), 0)...}; +} +template +inline void push(Stack* stack, Types&&... args) { + return push(*stack, std::forward(args)...); +} +template +inline void push_list_elements(Stack& stack, const c10::List& elements) { + for (T elem : elements) { + stack.push_back(std::move(elem)); + } +} + +// The packer here is carefully written not to make any unnecessary +// copies. + +// pack takes the return values of aten functions pushes them onto the stack +template +inline void pack(Stack& stack, T&& v) { + stack.emplace_back(std::forward(v)); +} +template +inline void pack(Stack* stack, T&& v) { + pack(*stack, std::forward(v)); +} + +template +struct TuplePacker { + // NB: *Not* a universal reference. + static void execute(Stack& stack, std::tuple&& t) { + // NB: The move here does not "destroy" the entire tuple, that is + // not what std::move does; only the particular tuple index + // processed here gets stolen. + pack(stack, std::get(std::move(t))); + TuplePacker::execute(stack, std::move(t)); + } +}; + +template +struct TuplePacker<0, Args...> { + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + static void execute(Stack& /*stack*/, std::tuple&& /*t*/){} +}; + +template +inline void pack(Stack& stack, std::tuple&& t) { + TuplePacker::execute(stack, std::move(t)); +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h new file mode 100644 index 0000000000000000000000000000000000000000..759d2ae7602ce3fba83bf74900e1085ac3fb0b51 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h @@ -0,0 +1,152 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include // For std::hash +#include + + +namespace c10 { + +// 'prim' symbols are synthetic operators that occur only in the IR +// and don't have corresponding implementations in ATen. + +// 'onnx' symbols correspond to ONNX operators. Their semantics +// are defined in https://github.com/onnx/onnx/blob/master/docs/Operators.md +// The particular version we are targeting is specified by '_onnx_opset_version' +// in torch.onnx.symbolic_helper +// +// In general, most ONNX operators won't get an entry here, because they +// are handled from the Python end. However, you may occasionally need +// to intern an ONNX symbol here so that you can conveniently write an +// optimization on ONNX operations. + +// 'attr' symbols are attribute keys. They are shared between both ONNX and ATen +// operators (you disambiguate their meaning by looking at the operator itself). +// In general, you only need to define attribute keys that are used by +// onnx or prim; ATen attributes are automatically generated in FORALL_ATTR_BASE_SYMBOLS. + +// Note [Symbol allocation] +// ~~~~~~~~~~~~~~~~~~~~~~~~ +// +// 1. Symbol namespace is split up into namespaces. +// +// 2. The intended access pattern for built-in symbols is onnx::MatMul +// in the c10 namespace (this is a Symbol). +// + +// Built-in constant definition strategy: +// - Enum is the most convenient way to generate a contiguous sequence +// of numbers for an identifier. +// - However, an enum gives you a fresh type. We want onnx::MatMul to +// be type Symbol, not some random enum type! +// - Therefore, after using enums to generate the sequence of integers, +// we then declare constexpr Symbols to get everything the actual Symbol +// type we want. Symbols must be constexpr to be valid to be "case"ed on. + +using unique_t = uint32_t; + +const std::string& domain_prefix(); + +// A Symbol is like an interned string, but with a little extra +// structure; it is namespaced via SymbolNamespace and the resulting +// intern pointers support efficient namespace testing. +struct TORCH_API Symbol { + explicit constexpr Symbol() : value(0) {} + explicit constexpr Symbol(unique_t uniq) + : value(uniq) {} + + // Get a Symbol for a qualified string like "attr::bar" + static Symbol fromQualString(const std::string & s); + + // Get a Symbol from a domain and an unqualified string like "org.pytorch.attr" and "bar" + static Symbol fromDomainAndUnqualString(const std::string & d, const std::string & s); + + // Constructors for our various namespaced strings. This will construct + // the appropriate namespaced string, e.g., "attr::foo" for the + // argument "foo", and then attempt to intern it. DO NOT USE THIS + // with a string literal; attr::foo should be available in that case + // (and if it's not, you should add it to the built-ins list above.) + static Symbol attr(const std::string & s); + static Symbol aten(const std::string & s); + static Symbol cuda(const std::string & s); + static Symbol onnx(const std::string & s); + static Symbol prim(const std::string & s); + static Symbol user(const std::string & s); + static Symbol caffe2(const std::string & s); + static Symbol dimname(const std::string & s); + // TODO: eliminate me + static Symbol scope(const std::string & s); + + bool is_attr() const; + bool is_aten() const; + bool is_cuda() const; + bool is_prim() const; + bool is_prims() const; + bool is_nvprims() const; + bool is_onnx() const; + bool is_user() const; + bool is_caffe2() const; + bool is_dimname() const; + + // So we can switch on this + constexpr operator unique_t() const { + return value; + } + + Symbol ns() const; + + // Give a string corresponding to the unqualified version of this name, e.g., + // "mm". Use this in a context where the intended namespace of the string is + // obvious; this is a *lossy* conversion. + const char * toUnqualString() const; + + // Give a string corresponding to the qualified version of this name, + // e.g., "aten::mm". This string format is made available to Python bindings + // (so we know how to parse it.) + const char * toQualString() const; + + // This describes a symbol in a case where humans read it. At the moment it's + // the same as toQualString. This has to be a const char* returned because + // a lot of printf style macros use it. + const char * toDisplayString() const; + + // Give a string corresponding to the domain name for the symbol, + // e.g., "org.pytorch.aten". + std::string domainString() const; + +private: + + explicit Symbol(Symbol ns, const std::string & s); + unique_t value; +}; + +static inline bool operator==(Symbol lhs, Symbol rhs) { + return static_cast(lhs) == static_cast(rhs); +} + +inline Symbol Symbol::attr(const std::string & s) { return Symbol::fromQualString("attr::" + s); } +inline Symbol Symbol::aten(const std::string & s) { return Symbol::fromQualString("aten::" + s); } +inline Symbol Symbol::cuda(const std::string & s) { return Symbol::fromQualString("cuda::" + s); } +inline Symbol Symbol::onnx(const std::string & s) { return Symbol::fromQualString("onnx::" + s); } +inline Symbol Symbol::prim(const std::string & s) { return Symbol::fromQualString("prim::" + s); } +inline Symbol Symbol::scope(const std::string & s) { return Symbol::fromQualString("scope::" + s); } +inline Symbol Symbol::user(const std::string & s) { return Symbol::fromQualString("user::" + s); } +inline Symbol Symbol::caffe2(const std::string & s) { return Symbol::fromQualString("_caffe2::" + s); } +inline Symbol Symbol::dimname(const std::string & s) { return Symbol::fromQualString("dimname::" + s); } + +} // namespace c10 + +// make symbol behave like an integer in hash tables +namespace std { +template <> +struct hash { + size_t operator()(c10::Symbol s) const { + return std::hash()(static_cast(s)); + } +}; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h new file mode 100644 index 0000000000000000000000000000000000000000..a0ffab285716c6b50546690bbd28c25ed2465db2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h @@ -0,0 +1,113 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace c10 { + +template +struct TORCH_API TypeFactoryBase {}; + +template <> +struct TORCH_API TypeFactoryBase { + template + static c10::DynamicTypePtr create(TypePtr ty, Args&&... args) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + c10::DynamicType::Arguments(c10::ArrayRef( + {std::move(ty), std::forward(args)...}))); + } + template + static c10::DynamicTypePtr create(const std::vector& types) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + c10::DynamicType::Arguments(types)); + } + static c10::DynamicTypePtr createNamedTuple( + const std::string& name, + const std::vector& fields, + const std::vector& types) { + return std::make_shared( + c10::DynamicType::Tag::Tuple, + name, + c10::DynamicType::Arguments(fields, types)); + } + template + C10_ERASE static c10::DynamicTypePtr createNamed(const std::string& name) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + name, + c10::DynamicType::Arguments{}); + } + template + C10_ERASE static decltype(auto) get() { + return DynamicTypeTrait::getBaseType(); + } + static const std::unordered_map& basePythonTypes(); +}; + +using DynamicTypeFactory = TypeFactoryBase; + +// Helper functions for constructing DynamicTypes inline. +template < + typename T, + std::enable_if_t::isBaseType, int> = 0> +C10_ERASE DynamicTypePtr dynT() { + return DynamicTypeFactory::get(); +} + +template < + typename T, + typename... Args, + std::enable_if_t::isBaseType, int> = 0> +C10_ERASE DynamicTypePtr dynT(Args&&... args) { + return DynamicTypeFactory::create(std::forward(args)...); +} + +template <> +struct TORCH_API TypeFactoryBase { + template + static c10::TypePtr create(TypePtr ty, Args&&... args) { + return T::create(std::move(ty), std::forward(args)...); + } + template + static c10::TypePtr create(std::vector types) { + return T::create(std::move(types)); + } + static c10::TypePtr createNamedTuple( + const std::string& name, + const std::vector& fields, + const std::vector& types); + template + C10_ERASE static c10::TypePtr createNamed(const std::string& name) { + return T::create(name); + } + static const std::unordered_map& basePythonTypes(); + template + C10_ERASE static c10::TypePtr get() { + return T::get(); + } +}; + +using DefaultTypeFactory = TypeFactoryBase; + +using PlatformType = +#ifdef C10_MOBILE + c10::DynamicType +#else + c10::Type +#endif + ; + +using TypeFactory = TypeFactoryBase; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h new file mode 100644 index 0000000000000000000000000000000000000000..5574060d262a7daef645775809f527fe2736b77f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace c10 { + +// Compatibility wrapper around a raw pointer so that existing code +// written to deal with a shared_ptr can keep working. +template +class SingletonTypePtr { + public: + /* implicit */ SingletonTypePtr(T* p) : repr_(p) {} + + // We need this to satisfy Pybind11, but it shouldn't be hit. + explicit SingletonTypePtr(std::shared_ptr /*unused*/) { TORCH_CHECK(false); } + + using element_type = typename std::shared_ptr::element_type; + + template , void>, bool> = true> + T& operator*() const { + return *repr_; + } + + T* get() const { + return repr_; + } + + T* operator->() const { + return repr_; + } + + operator bool() const { + return repr_ != nullptr; + } + + private: + T* repr_{nullptr}; +}; + +template +bool operator==(SingletonTypePtr lhs, SingletonTypePtr rhs) { + return (void*)lhs.get() == (void*)rhs.get(); +} + +template +bool operator!=(SingletonTypePtr lhs, SingletonTypePtr rhs) { + return !(lhs == rhs); +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h new file mode 100644 index 0000000000000000000000000000000000000000..d355739ecf46b3eca4002f380bdbdbad70b33298 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h new file mode 100644 index 0000000000000000000000000000000000000000..2363901b7dfab28b077df7bc77ae16c839b7614b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// Use TORCH_CUDA_CPP_API or TORCH_CUDA_CU_API for exports from this folder + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..aa7533ac136233c2e307fcc044f7d5a26fce4ba0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh @@ -0,0 +1,52 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include + +namespace at::cuda { + +/** + Computes ceil(a / b) +*/ +template +__host__ __device__ __forceinline__ T ATenCeilDiv(T a, T b) { + return (a + b - 1) / b; +} + +namespace { + +// Threads per block for our apply kernel +// FIXME: use occupancy calculator instead +constexpr uint32_t AT_APPLY_THREADS_PER_BLOCK = 512; +constexpr uint32_t AT_APPLY_BLOCKS_PER_SM = 4; + +template +inline bool getApplyGrid(uint64_t totalElements, dim3& grid, c10::DeviceIndex curDevice, int max_threads_per_block=AT_APPLY_THREADS_PER_BLOCK) { + if (curDevice == -1) return false; + uint64_t numel_per_thread = static_cast(max_threads_per_block) * static_cast(step); + uint64_t numBlocks = ATenCeilDiv(totalElements, numel_per_thread); + uint64_t maxGridX = at::cuda::getDeviceProperties(curDevice)->maxGridSize[0]; + if (numBlocks > maxGridX) + numBlocks = maxGridX; + grid = dim3(numBlocks); + return true; +} + +constexpr int getApplyBlocksPerSM() { + return AT_APPLY_BLOCKS_PER_SM; +} + +constexpr int getApplyBlockSize() { + return AT_APPLY_THREADS_PER_BLOCK; +} + +inline dim3 getApplyBlock(int max_threads_per_block=AT_APPLY_THREADS_PER_BLOCK) { + return dim3(max_threads_per_block); +} + +} // anonymous namespace +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..d629ff658bb4cb2a1be10c3eb1d2e087c507ef7d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh @@ -0,0 +1,154 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// Collection of direct PTX functions + +namespace at::cuda { + +template +struct Bitfield {}; + +template <> +struct Bitfield { + static __device__ __host__ __forceinline__ + unsigned int getBitfield(unsigned int val, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + unsigned int m = (1u << len) - 1u; + return (val >> pos) & m; +#else + unsigned int ret; + asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len)); + return ret; +#endif + } + + static __device__ __host__ __forceinline__ + unsigned int setBitfield(unsigned int val, unsigned int toInsert, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + unsigned int m = (1u << len) - 1u; + toInsert &= m; + toInsert <<= pos; + m <<= pos; + + return (val & ~m) | toInsert; +#else + unsigned int ret; + asm("bfi.b32 %0, %1, %2, %3, %4;" : + "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len)); + return ret; +#endif + } +}; + +template <> +struct Bitfield { + static __device__ __host__ __forceinline__ + uint64_t getBitfield(uint64_t val, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + uint64_t m = (1u << len) - 1u; + return (val >> pos) & m; +#else + uint64_t ret; + asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len)); + return ret; +#endif + } + + static __device__ __host__ __forceinline__ + uint64_t setBitfield(uint64_t val, uint64_t toInsert, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + uint64_t m = (1u << len) - 1u; + toInsert &= m; + toInsert <<= pos; + m <<= pos; + + return (val & ~m) | toInsert; +#else + uint64_t ret; + asm("bfi.b64 %0, %1, %2, %3, %4;" : + "=l"(ret) : "l"(toInsert), "l"(val), "r"(pos), "r"(len)); + return ret; +#endif + } +}; + +__device__ __forceinline__ int getLaneId() { +#if defined(USE_ROCM) + return __lane_id(); +#else + int laneId; + asm("mov.s32 %0, %%laneid;" : "=r"(laneId) ); + return laneId; +#endif +} + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskLt() { + const std::uint64_t m = (1ull << getLaneId()) - 1ull; + return m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskLt() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask)); + return mask; +} +#endif + +#if defined (USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskLe() { + std::uint64_t m = UINT64_MAX >> (sizeof(std::uint64_t) * CHAR_BIT - (getLaneId() + 1)); + return m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskLe() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask)); + return mask; +} +#endif + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskGt() { + const std::uint64_t m = getLaneMaskLe(); + return m ? ~m : m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskGt() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask)); + return mask; +} +#endif + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskGe() { + const std::uint64_t m = getLaneMaskLt(); + return ~m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskGe() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask)); + return mask; +} +#endif + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh new file mode 100644 index 0000000000000000000000000000000000000000..df113e0b3b4b89d4785926444fa79ffccb23c921 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh @@ -0,0 +1,530 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#if !(defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)))) +#include +#endif + +template +struct AtomicFPOp; + +template <> +struct AtomicFPOp { + template + inline __device__ at::Half operator() (at::Half *address, at::Half val, const func_t& func) { + unsigned int * address_as_ui = + (unsigned int *) ((char *)address - ((size_t)address & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + at::Half hsum; + do { + assumed = old; + hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + hsum = func(hsum, val); + old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x; + old = atomicCAS(address_as_ui, assumed, old); + } while (assumed != old); + hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + return hsum; + } +}; + +template <> +struct AtomicFPOp { + template + inline __device__ at::BFloat16 operator() (at::BFloat16 *address, at::BFloat16 val, const func_t& func) { + unsigned int * address_as_ui = + (unsigned int *) ((char *)address - ((size_t)address & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + at::BFloat16 bsum; + do { + assumed = old; + bsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + bsum = func(bsum, val); + old = (size_t)address & 2 ? (old & 0xffff) | (bsum.x << 16) : (old & 0xffff0000) | bsum.x; + old = atomicCAS(address_as_ui, assumed, old); + } while (assumed != old); + bsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + return bsum.x; + } +}; + +template <> +struct AtomicFPOp { + template + inline __device__ double operator() (double * address, double val, const func_t& func) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull; + unsigned long long int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, func(val, assumed)); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); + } +}; + +#define ATOMIC_INTEGER_IMPL(NAME) \ +template \ +struct Atomic##NAME##IntegerImpl; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + size_t offset = (size_t)address & 3; \ + uint32_t * address_as_ui = (uint32_t *)((char *)address - offset); \ + uint32_t old = *address_as_ui; \ + uint32_t shift = offset * 8; \ + uint32_t old_byte; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + old_byte = (old >> shift) & 0xff; \ + newval = static_cast(func(val, static_cast(old_byte))); \ + newval = (old & ~(0x000000ff << shift)) | (newval << shift); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + size_t offset = (size_t)address & 2; \ + uint32_t * address_as_ui = (uint32_t *)((char *)address - offset); \ + bool is_32_align = offset; \ + uint32_t old = *address_as_ui; \ + uint32_t old_bytes; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + old_bytes = is_32_align ? old >> 16 : old & 0xffff; \ + newval = static_cast(func(val, static_cast(old_bytes))); \ + newval = is_32_align ? (old & 0xffff) | (newval << 16) : (old & 0xffff0000) | newval; \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + uint32_t * address_as_ui = (uint32_t *) (address); \ + uint32_t old = *address_as_ui; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + newval = static_cast(func(val, static_cast(old))); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + unsigned long long * address_as_ui = (unsigned long long *) (address); \ + unsigned long long old = *address_as_ui; \ + unsigned long long newval; \ + unsigned long long assumed; \ + \ + do { \ + assumed = old; \ + newval = static_cast(func(val, static_cast(old))); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; + + +# define GPU_ATOMIC_INTEGER(NAME, OP, DTYPE) \ +inline __device__ void gpuAtomic##NAME(DTYPE *address, DTYPE val) { \ +Atomic##NAME##IntegerImpl()(address, \ + val, \ + [](DTYPE a, DTYPE b) { \ + return OP; \ + }); \ +} \ + +ATOMIC_INTEGER_IMPL(Add) +GPU_ATOMIC_INTEGER(Add, a || b, bool) + +// Don't instantiate gpuAtomicAdd with the macro as it seems non-standard (see int32, int64) +inline __device__ void gpuAtomicAdd(uint8_t *address, uint8_t val) { + AtomicAddIntegerImpl()(address, + val, + [](uint8_t a, uint8_t b) { + return a + b; + }); +} + +inline __device__ void gpuAtomicAdd(int8_t *address, int8_t val) { + AtomicAddIntegerImpl()(address, + val, + [](int8_t a, int8_t b) { + return a + b; + }); +} + +inline __device__ void gpuAtomicAdd(int16_t *address, int16_t val) { + AtomicAddIntegerImpl()(address, + val, + [](int16_t a, int16_t b) { + return a + b; + }); +} + +inline __device__ int32_t gpuAtomicAdd(int32_t *address, int32_t val) { + return atomicAdd(address, val); +} + +inline __device__ void gpuAtomicAdd(int64_t *address, int64_t val) { +#if defined(USE_ROCM) + __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +#else + static_assert(sizeof(unsigned long long int) == sizeof(int64_t), "bitwidth change is not allowed"); + atomicAdd(reinterpret_cast(address), static_cast(val)); +#endif +} + +inline __device__ at::Half gpuAtomicAdd(at::Half *address, at::Half val) { +#if defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700))) + return AtomicFPOp()(address, val, + [](at::Half hsum, at::Half val) { + return hsum + val; + }); +#else + return atomicAdd(reinterpret_cast<__half*>(address), val); +#endif +} + +inline __device__ at::BFloat16 gpuAtomicAdd(at::BFloat16 *address, at::BFloat16 val) { +#if defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))) +return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return bsum + val; + }); +#else + __nv_bfloat16 r = atomicAdd(reinterpret_cast<__nv_bfloat16*>(address), *reinterpret_cast<__nv_bfloat16*>(&val)); + return *reinterpret_cast(&r); +#endif +} + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600) +// from CUDA C Programmic Guide +inline __device__ double atomicAdd(double* address, double val) +#if defined(__clang__) && defined(__CUDA__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wgcc-compat" + __attribute__((enable_if(true, ""))) +#pragma GCC diagnostic pop +#endif +{ + + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(val + __longlong_as_double(assumed)); + }); +} +#elif defined(USE_ROCM) || !(defined(__CUDA_ARCH__)) + +/* Note [hip-clang differences to hcc] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The upcoming hip-clang compiler for ROCm differs from hcc in a few details. + * It exports the __HIP__ macro, we can hence differentiate between hcc and + * hip-clang. In the below, hcc only received support for atomicAdd with double + * typing after work week 18312. hip-clang had support from the first version. + * In general, the code-visible differences between hip-clang and hcc will be + * minimal. + */ + +#if defined(USE_ROCM) && __hcc_workweek__ < 18312 && !__HIP__ + // This needs to be defined for the host side pass + inline __device__ double atomicAdd(double *address, double val) { } +#endif +#endif + +inline __device__ double gpuAtomicAdd(double *address, double val) { + return atomicAdd(address, val); +} + +inline __device__ float gpuAtomicAdd(float *address, float val) { + return atomicAdd(address, val); +} + +template +inline __device__ void gpuAtomicAdd(c10::complex *address, c10::complex val) { + gpuAtomicAdd(&address->real_, val.real_); + gpuAtomicAdd(&address->imag_, val.imag_); +} + +/* Note [gpuAtomicAdd vs atomicAdd] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Some extensions such as torchvision call atomicAdd() + * directly and require non-library provided data type support. Only for these, we + * continue to provide atomicAdd overloads. + */ +inline __device__ at::Half atomicAdd(at::Half *address, at::Half val) { + return gpuAtomicAdd(address, val); +} + +inline __device__ at::BFloat16 atomicAdd(at::BFloat16 *address, at::BFloat16 val) { + return gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(uint8_t *address, uint8_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int8_t *address, int8_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int16_t *address, int16_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int64_t *address, int64_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(bool *address, bool val) { + gpuAtomicAdd(address, val); +} + +/* Note [explicitly non-returning atomics] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * AMD's MI100 (gfx908) provides an optimized fp32 atomicAdd, exposed via atomicAddNoRet(). + * Due to compiler limitations, callers must opt-in to guarantee the optimized instruction. + * This non-returning atomicAddNoRet cannot be used to implement the returning atomicAdd, + * therefore we need a new API 'gpuAtomicAddNoReturn'. + */ +template +inline __device__ void gpuAtomicAddNoReturn(c10::complex *address, c10::complex val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(uint8_t *address, uint8_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int8_t *address, int8_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int16_t *address, int16_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int32_t *address, int32_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int64_t *address, int64_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(bool *address, bool val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(at::Half *address, at::Half val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(at::BFloat16 *address, at::BFloat16 val) { gpuAtomicAdd(address, val); } + +/* Note [HIP unsafeAtomicAdd] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Use unsafeAtomicAdd instead of atomicAdd for fp32 and fp64. + * On HIP, atomicAdd is always correct but is a slow CAS loop. + * unsafeAtomicAdd will use HW instructions and is much faster, + * but the caller must guarantee the pointer is GPU memory. + * If the pointer is system memory, the result is a silent no-op. + * This guarantee is upheld by all PyTorch uses of unsafeAtomicAdd. + * AMD HIP atomic header file is named amd_hip_atomic.h and is + * under the LLVM compiler directory. + */ +#if defined(USE_ROCM) +inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { +#if defined(__gfx908__) + atomicAddNoRet(address, val); +#else + (void)unsafeAtomicAdd(address, val); +#endif +} +inline __device__ void gpuAtomicAddNoReturn(double *address, double val) { (void)unsafeAtomicAdd(address, val); } +#else +inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(double *address, double val) { gpuAtomicAdd(address, val); } +#endif + +// Atomic multiplication implementation. + +ATOMIC_INTEGER_IMPL(Mul) +GPU_ATOMIC_INTEGER(Mul, a * b, uint8_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int8_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int16_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int32_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int64_t) + +inline __device__ at::Half gpuAtomicMul(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return bsum * val; + }); +} + +inline __device__ at::BFloat16 gpuAtomicMul(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return bsum * val; + }); +} + +inline __device__ double gpuAtomicMul(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(val * __longlong_as_double(assumed)); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMul (float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val * + __int_as_float(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +// Atomic maximum implementation. + +template +__host__ __device__ T safe_max(T a, T b) { + #if defined(__HIPCC__) + // TODO: remove this special case for HIP when issue is fixed: + // https://github.com/ROCm/hip/issues/2209 + T max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max(a, b)); + #else + T max = at::_isnan(b) ? b : std::max(a, b); + #endif + + return max; +} + +ATOMIC_INTEGER_IMPL(Max) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), uint8_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int8_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int16_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int32_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int64_t) + +inline __device__ at::Half gpuAtomicMax(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return safe_max(bsum, val); + }); +} + +inline __device__ at::BFloat16 gpuAtomicMax(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return safe_max(bsum, val); + }); +} + +inline __device__ double gpuAtomicMax(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(safe_max(val, __longlong_as_double(assumed))); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMax(float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(safe_max(val, __int_as_float(assumed)))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +// Atomic minimum implementation. + +template +__host__ __device__ T safe_min(T a, T b) { + #if defined(__HIPCC__) + // TODO: remove this special case for HIP when issue is fixed: + // https://github.com/ROCm/hip/issues/2209 + T min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min(a, b)); + #else + T min = at::_isnan(b) ? b : std::min(a, b); + #endif + + return min; +} + +ATOMIC_INTEGER_IMPL(Min) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), uint8_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int8_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int16_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int32_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int64_t) + +inline __device__ at::Half gpuAtomicMin(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return safe_min(bsum, val); + }); +} + +inline __device__ at::BFloat16 gpuAtomicMin(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return safe_min(bsum, val); + }); +} + +inline __device__ double gpuAtomicMin(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(safe_min(val, __longlong_as_double(assumed))); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMin(float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(safe_min(val, __int_as_float(assumed)))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..73ff79247312123b0cead7bd00734aa259ccd948 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh @@ -0,0 +1,542 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// +// This file contains pointwise operation functions and kernels that +// work on both contiguous and non-contiguous tensor arguments of +// arbitrary (up to MAX_CUTORCH_DIMS) dimensioned arguments without +// copying or temporary storage. +// + +/* + NOTE [ CUDA_tensor_applyN helpers ] + + The following CUDA_tensor_applyN (where N currently can be 1, 2, 3, or 4) + functions apply a pointwise operator to N tensor(s). + + The calling convention is + + 1. The template arguments should be, sequentially, + - First N typename args specify the scalar types of each of the N tensors. + - (Optional) `int step` arg specifies the number of elements processed + together at the same time. + Default is 1. + - A usually omitted (i.e., inferred) typename arg specifies the type of the + function/functor applied on `N * step` values in each iteration of each + CUDA thread. + 2. The arguments should be, sequentially, + - N tensors + - op: a function/functor that processes `N * step` values at the same time. + - If `step == 1`, it must have signature + `void(*)(scalar1_t&, scalar2_t&, ..., scalarN_t&)`, where + `scalar*_t`s are the first N typename template args, and the inputs + are the `N` values from the `N` tensors retrieved at a common index. + - Otherwise, it must must have signature + void(*)(int n, scalar1_t&, scalar1_t&, ..., scalar1_t&, // repeat `step` times + scalar2_t&, scalar2_t&, ..., scalar2_t&, // repeat `step` times + ..., + scalarN_t&, scalarN_t&, ..., scalarN_t&) // repeat `step` times + Different from `step == 1` case, it processes `N * step` values taken + from `step` common indices. Moreover, the first input `n` represents the + number of valid indices (it will always have `0 < n <= step`). It will + almost always be `step`, but at the boundary we may not have full `step` + elements and `n` can be a lesser value. + + E.g., if `step == 4` and `N == 2`, `op` could be + + [](int n, scalar1_t &u1, scalar1_t &u2, scalar1_t &u3, scalar1_t &u4, + scalar2_t &v1, scalar2_t &v2, scalar2_t &v3, scalar2_t &v4) { + // Only process u1, ..., un and v1, ..., vn. + // So if `n == 3`, `u4` and `v4` need not to be considered. + } + + In both cases, the references can actually be const, but at least one of + them should be non-const in order to write the output. + - (Optional, but recommended) N TensorArgType args that specify for each + tensor whether `op` reads AND writes ] (i.e., TensorArgType::ReadWrite), + or only reads (i.e., TensorArgType::ReadOnly). + Default is TensorArgType::ReadWrite for first Tensor, and + TensorArgType::ReadOnly for the rest. + + E.g., + + to compute a = b^2 for a and b of same dtype, we can call + + CUDA_tensor_apply2( + a, b, + [] __device__ (scalar &a_val, const scalar &b_val) { a_val = b_val * b_val; } + ); + + to work on 2 values at the same time, we can call + + CUDA_tensor_apply2( + a, b, + [] __device__ (int n, scalar1 &a_val1, scalar1 &a_val2, + const scalar2 &b_val1, const scalar2 &b_val2) { + // call special vectorized op here, or just do elementwise and enjoy unrolling... + // if n == 1, only process a_val1 and b_val1 + } + ); +*/ + +namespace at::cuda { + +// TODO: combine with TensorArg? So far that's been for debugging, and this is functional... +enum class TensorArgType { ReadWrite, ReadOnly }; + +namespace { + +// Rearrange dimensions for pointwise operations so that strides are in +// decreasing order as much as possible, so that kernels have better memory +// access patterns. +// +// For example, consider a binary operation on two "transposed" 2-dim tensors: +// sizes: 256 512 +// aInfo->strides: 1 256 +// bInfo->strides: 1 256 +// +// Given this, each concurrent memory access inside kernelPointwiseApply2() is +// exactly 256 elements apart, resulting in poor performance. +// +// This function exchanges dimensions so that memory access is contiguous: +// sizes: 512 256 +// aInfo->strides: 256 1 +// bInfo->strides: 256 1 +// +// (Actually, it becomes even better because now collapseDims() can turn each +// input into one contiguous array.) +// +// In general, given M (<=4) TensorInfo's with N dimensions, we can view each +// strides[i] (0 <= i < N) as an M-tuple. Given each pair i < j, we exchange +// strides[i] and [j] if +// (1) strides[i][k] < strides[j][k] for some k (0 <= k < M) +// (exchanging them will benefit input #k), and +// (2) strides[i][k] <= strieds[j][k] for all k +// (exchanging them will not make any input worse). +template +inline void rearrangeDims(detail::TensorInfo* aInfo, + detail::TensorInfo* bInfo = nullptr, + detail::TensorInfo* cInfo = nullptr, + detail::TensorInfo* dInfo = nullptr) { + int numInfos = 1; + int dims = aInfo->dims; + IndexType *sizes[4] = { aInfo->sizes, }; + IndexType *strides[4] = { aInfo->strides, }; + + if (bInfo != nullptr) { + ++numInfos; + if (bInfo->dims != dims) return; + sizes[1] = bInfo->sizes; + strides[1] = bInfo->strides; + } + + if (cInfo != nullptr) { + ++numInfos; + if (cInfo->dims != dims) return; + sizes[2] = cInfo->sizes; + strides[2] = cInfo->strides; + } + + if (dInfo != nullptr) { + ++numInfos; + if (dInfo->dims != dims) return; + sizes[3] = dInfo->sizes; + strides[3] = dInfo->strides; + } + + // Bail out if sizes do not match: we are using "deprecated pointwise + // behavior" among tensors of different shapes but same number of elements. + for (int i = 1; i < numInfos; ++i) { + for (int j = 0; j < dims; ++j) { + if (sizes[i][j] != sizes[0][j]) return; + } + } + + for (int i = 0; i < dims - 1; ++i) { + // No need to consider dimensions of size 1. + if (sizes[0][i] == 1) continue; + + for (int j = i + 1; j < dims; ++j) { + if (sizes[0][j] == 1) continue; + + // Compare the relative sizes of strides between dim #i and dim #j. + bool hasIncreasingStrides = false; + bool hasDecreasingStrides = false; + + for (int k = 0; k < numInfos; k++) { + IndexType stride_i = strides[k][i]; + IndexType stride_j = strides[k][j]; + if (stride_i < stride_j) { + hasIncreasingStrides = true; + } else if (stride_i > stride_j) { + hasDecreasingStrides = true; + } + } + + if (hasIncreasingStrides && !hasDecreasingStrides) { + for (int k = 0; k < numInfos; k++) { + IndexType size = sizes[k][i]; + sizes[k][i] = sizes[k][j]; + sizes[k][j] = size; + + IndexType stride = strides[k][i]; + strides[k][i] = strides[k][j]; + strides[k][j] = stride; + } + } + } + } +} + +// The `remaining_steps` argument is used to support Op that operates on +// multiple elements at the same time. Generally, the strategy of ApplyOpN is to +// 1. Initialize `remaining_steps = step`, where `step` is the template arg of +// CUDA_tensor_applyN helpers. The input arg `n` to `apply()` represents the +// number of elements in bound for this call. It will almost always equal to +// `step` except at boundaries. +// 2. If `remaining_steps > 0` convert the current linearIndex to offset (if in +// bound), and recursively call `ApplyOpN` with `remaining_steps - 1`. +// 3. At `remaining_steps = 0`, +// if `step = 1`, call `op(tensor1_val, tensor2_val, ...)`; +// if `step > 1`, call `op(n, tensor1_val1, tensor1_val2, ..., tesor1_valstep, +// tensor2_val1, tensor2_val2, ..., tesor2_valstep, +// ... +// tensorN_val1, tensorN_val2, ..., tesorN_valstep);` +// +// See NOTE [ CUDA_tensor_applyN helpers ] above for how Op may look like. + +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, int n, + IndexType linearIndex, Offsets... aOffsets) { + // Convert `linearIndex` into an offset of `a` + const IndexType aOffset = sizeof...(Offsets) < n ? + detail::IndexToOffset::get(linearIndex, a) : 0; + + ApplyOp1::apply( + a, op, n, linearIndex + 1, aOffsets..., aOffset + ); +} +}; + +// Specialize `step=1` case (i.e., `remaining_steps=0` and `len(Offsets)=1`). +// We don't need to pass in how many elements need to processed in this case. +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, + int n, IndexType linearIndex, Offset offset) { + op(a.data[offset]); +} +}; + +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, int n, + IndexType linearIndex, Offsets... offsets) { + op(n, a.data[offsets]...); +} +}; + +template +#if __CUDA_ARCH__ >= 350 || defined(USE_ROCM) +C10_LAUNCH_BOUNDS_2(AT_APPLY_THREADS_PER_BLOCK, AT_APPLY_BLOCKS_PER_SM) +#endif +__global__ void kernelPointwiseApply1(detail::TensorInfo a, + IndexType totalElements, const Op op) { + for (IndexType linearIndex = (blockIdx.x * blockDim.x + threadIdx.x) * step; + linearIndex < totalElements; + linearIndex += gridDim.x * blockDim.x * step) { + ApplyOp1::apply( + a, op, ::min(step, static_cast(totalElements - linearIndex)), linearIndex); + } +} + + +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int64_t n, IndexType linearIndex, + Offsets... aOffsets, Offsets... bOffsets) { + // Convert `linearIndex` into an offset of `a` + const IndexType aOffset = static_cast(sizeof...(Offsets)) < n ? + detail::IndexToOffset::get(linearIndex, a) : 0; + + // Convert `linearIndex` into an offset of `b` + const IndexType bOffset = static_cast(sizeof...(Offsets)) < n ? + detail::IndexToOffset::get(linearIndex, b) : 0; + + ApplyOp2::apply( + a, b, op, n, linearIndex + 1, aOffsets..., aOffset, bOffsets..., bOffset + ); +} +}; + +// Specialize `step=1` case (i.e., `remaining_steps=0` and `len(Offsets)=1`). +// We don't need to pass in how many elements need to processed in this case. +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int /*n*/, IndexType /*linearIndex*/, + Offset aOffset, Offset bOffset) { + op(a.data[aOffset], b.data[bOffset]); +} +}; + +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int n, IndexType linearIndex, + Offsets... aOffsets, Offsets... bOffsets) { + op(n, a.data[aOffsets]..., b.data[bOffsets]...); +} +}; + +template +#if __CUDA_ARCH__ >= 350 || defined(USE_ROCM) +C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) +#endif +__global__ void +kernelPointwiseApply2(detail::TensorInfo a, + detail::TensorInfo b, + IndexType totalElements, + const Op op) { + for (IndexType linearIndex = (blockIdx.x * blockDim.x + threadIdx.x) * step; + linearIndex < totalElements; + linearIndex += gridDim.x * blockDim.x * step) { + ApplyOp2::apply( + a, b, op, ::min(step, static_cast(totalElements - linearIndex)), + linearIndex); + } +} + +} // anonymous namespace + +template +inline bool CUDA_tensor_apply2(at::TensorBase a, + at::TensorBase b, + const Op op, + TensorArgType aType = TensorArgType::ReadWrite, + TensorArgType bType = TensorArgType::ReadOnly) { + TORCH_CHECK(a.device().is_cuda() && b.device().is_cuda(), + "CUDA_tensor_apply2: Expected tensors to have CUDA DeviceType, but got " + "tensors with type ", a.device().type(), " and ", b.device().type()); + int64_t totalElements = a.numel(); + + if (totalElements != b.numel()) { + return false; + } + + if (a.dim() > MAX_TENSORINFO_DIMS || + b.dim() > MAX_TENSORINFO_DIMS) { + return false; + } + + if (a.numel() == 0) { + // Empty tensor; do nothing + return true; + } + const dim3 block = getApplyBlock(max_threads_per_block); + + dim3 grid; + auto curDevice = current_device(); + if (curDevice == -1) return false; + if (!getApplyGrid(totalElements, grid, curDevice, max_threads_per_block)) { + return false; + } + + /* + Expands readable/writable tensors whose indices may be "overlapped." + This ensures that each element of the tensor is operated on once and only + once. + */ + TensorBase oldA; + TensorBase oldB; + + if (aType == TensorArgType::ReadWrite && detail::maybeOverlappingIndices(a)) { + // Must perform in contiguous space + oldA = std::exchange(a, a.contiguous()); + } + if (bType == TensorArgType::ReadWrite && detail::maybeOverlappingIndices(b)) { + // Must perform in contiguous space + oldB = std::exchange(b, b.contiguous()); + } + + // It is possible that the tensor dimensions are able to be collapsed, + // and thus we can reduce the actual code complexity of the copy by + // exploiting this knowledge statically, since the div/mod is the + // most expensive part of the operation, more so than memory accesses. + // For instance, when copying a non-contiguous to a contiguous tensor + // (or vice versa), the contiguous tensor can be collapsed to one + // dimension, and the loop to translate the linear index to the array + // index can be similarly collapsed. That is what this unrolling is for. + +#define HANDLE_CASE(TYPE, A, B) \ + kernelPointwiseApply2 \ + <<>>( \ + aInfo, bInfo, static_cast(totalElements), op); \ + C10_CUDA_KERNEL_LAUNCH_CHECK(); + +#define HANDLE_B_CASE(TYPE, A, B) { \ + switch (B) { \ + case 1: \ + HANDLE_CASE(TYPE, A, 1); \ + break; \ + case 2: \ + HANDLE_CASE(TYPE, A, 2); \ + break; \ + default: \ + HANDLE_CASE(TYPE, A, -1); \ + break; \ + } \ +} + +#define HANDLE_A_CASE(TYPE, A, B) { \ + switch (A) { \ + case 1: \ + HANDLE_B_CASE(TYPE, 1, B); \ + break; \ + case 2: \ + HANDLE_B_CASE(TYPE, 2, B); \ + break; \ + default: \ + HANDLE_B_CASE(TYPE, -1, B); \ + break; \ + } \ +} + + if (detail::canUse32BitIndexMath(a) && + detail::canUse32BitIndexMath(b)) { + detail::TensorInfo aInfo = + detail::getTensorInfo(a); + + detail::TensorInfo bInfo = + detail::getTensorInfo(b); + rearrangeDims(&aInfo, &bInfo); + aInfo.collapseDims(); + bInfo.collapseDims(); + + HANDLE_A_CASE(unsigned int, aInfo.dims, bInfo.dims); + } else { + detail::TensorInfo aInfo = + detail::getTensorInfo(a); + + detail::TensorInfo bInfo = + detail::getTensorInfo(b); + rearrangeDims(&aInfo, &bInfo); + aInfo.collapseDims(); + bInfo.collapseDims(); + + /* + Only instantiates the all 1D special case and the fallback all nD case for + large (64-bit indexed) tensors to reduce compilation time. + */ + if (aInfo.dims == 1 && bInfo.dims == 1) { + HANDLE_CASE(uint64_t, 1, 1); + } else { + HANDLE_CASE(uint64_t, -1, -1); + } + } +#undef HANDLE_CASE +#undef HANDLE_B_CASE +#undef HANDLE_A_CASE + + if (oldA.defined()) { + at::native::copy_ignoring_overlaps(oldA, a); + } + + if (oldB.defined()) { + at::native::copy_ignoring_overlaps(oldB, b); + } + + return true; +} + +/* Provides default step = 1 to CUDA_tensor_apply2. */ +template +inline bool CUDA_tensor_apply2(const at::TensorBase &a, + const at::TensorBase &b, + const Op op, + TensorArgType aType = TensorArgType::ReadWrite, + TensorArgType bType = TensorArgType::ReadOnly) { + return CUDA_tensor_apply2(a, b, op, aType, bType); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h new file mode 100644 index 0000000000000000000000000000000000000000..4be9f1a3f05a4888597a7252d1e8d3c3a305bbe5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h @@ -0,0 +1,398 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +/* + Provides a subset of CUDA BLAS functions as templates: + + gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + + gemv(transa, m, n, alpha, a, lda, x, incx, beta, y, incy) + + dot(n, x, incx, y, incy, result) + + where Dtype is double, float, at::Half or at::BFloat16 (ROCm, NOT for dot). + The functions are available in at::cuda::blas namespace. + */ + +#include +#include +#include + +namespace at::cuda::blas { + +// RAII guard that sets the CuBLAS pointer mode and restores it to +// its previous value when the guard is destroyed +class PointerModeGuard { +public: + PointerModeGuard(cublasHandle_t handle, cublasPointerMode_t mode) : + handle(handle) { + TORCH_CUDABLAS_CHECK(cublasGetPointerMode(handle, &previous_mode)); + TORCH_CUDABLAS_CHECK(cublasSetPointerMode(handle, mode)); + } + + ~PointerModeGuard() { + cublasSetPointerMode(handle, previous_mode); + } + +private: + cublasHandle_t handle; + cublasPointerMode_t previous_mode{}; +}; + +/* LEVEL 3 BLAS FUNCTIONS */ + +#define CUDABLAS_GEMM_ARGTYPES(Dtype) CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype) + +#define CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \ + char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type alpha, \ + const Dtype *a, int64_t lda, const Dtype *b, int64_t ldb, at::opmath_type beta,\ + C_Dtype *c, int64_t ldc + +#define CUDABLAS_GEMM_ARGS(Dtype) transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc + +#define CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT \ + ((std::is_same::value || std::is_same::value) && std::is_same::value) + +template ::type* = nullptr> +inline void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm: not implemented"); +} + +template ::type* = nullptr> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)); + +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(double)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(float)); +template <> +void gemm>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(at::Half)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); +template<> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +template +inline void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm_internal: not implemented"); +} + +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)); +template <> +void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); +template<> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +enum GEMMAndBiasActivationEpilogue { + None, + RELU, + GELU, +}; + +// NOTE: GELU activation is not supported prior to CUDA 11.4 and will +// do nothing if passed in that case. +template +bool gemm_and_bias( + bool transpose_mat1, + bool transpose_mat2, + int64_t m, + int64_t n, + int64_t k, + at::opmath_type alpha_val, + const Dtype* mat1_ptr, + int64_t mat1_ld, + const Dtype* mat2_ptr, + int64_t mat2_ld, + const Dtype* bias, + C_Dtype* result_ptr, + int64_t result_ld, + GEMMAndBiasActivationEpilogue activation = GEMMAndBiasActivationEpilogue::None); + +void int8_gemm( + bool transpose_mat1, + bool transpose_mat2, + int64_t m, + int64_t n, + int64_t k, + const int8_t* mat1_ptr, + int64_t mat1_ld, + const int8_t* mat2_ptr, + int64_t mat2_ld, + int32_t* result_ptr, + int64_t result_ld); + +void scaled_gemm( + char transa, + char transb, + int64_t m, + int64_t n, + int64_t k, + const void* mat1_ptr, + const void* mat1_scale_ptr, + int64_t mat1_ld, + ScalarType mat1_dtype, + ScalarType mat1_scale_dtype, + at::blas::ScalingType mat1_scaling_type, + const void* mat2_ptr, + const void* mat2_scale_ptr, + int64_t mat2_ld, + ScalarType mat2_dtype, + ScalarType mat2_scale_dtype, + at::blas::ScalingType mat2_scaling_type, + const void* bias_ptr, + ScalarType bias_dtype, + void* result_ptr, + const void* result_scale_ptr, + int64_t result_ld, + ScalarType result_dtype, + bool use_fast_accum, + const std::optional& alpha); + +#define CUDABLAS_BGEMM_ARGTYPES(Dtype) CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype) + +#define CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \ + char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type alpha, \ + const Dtype *a, int64_t lda, int64_t stridea, \ + const Dtype *b, int64_t ldb, int64_t strideb, \ + at::opmath_type beta, C_Dtype *c, int64_t ldc, int64_t stridec, int64_t num_batches + +#define CUDABLAS_BGEMM_ARGS(Dtype) \ + transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, num_batches + +template ::type* = nullptr> +inline void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm: not implemented"); +} + +template ::type* = nullptr> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)); + +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(double)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(float)); +template <> +void bgemm>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::Half)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)); +template<> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +template +inline void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm_internal: not implemented"); +} + +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(float)); +template <> +void bgemm_internal>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm_internal>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::Half)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)); +template<> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +#define CUDABLAS_TRSM_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \ + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, \ + const Dtype *alpha, const Dtype *A, int lda, Dtype *B, int ldb + +template +inline void trsm(CUDABLAS_TRSM_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::trsm: not implemented"); +} + +template <> +TORCH_CUDA_CU_API void trsm(CUDABLAS_TRSM_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void trsm(CUDABLAS_TRSM_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void trsm>(CUDABLAS_TRSM_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void trsm>(CUDABLAS_TRSM_ARGTYPES(c10::complex)); + +#define CUDABLAS_TRSM_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \ + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, \ + const Dtype *alpha, Dtype *A[], int lda, Dtype *B[], int ldb, \ + int batchCount + +template +inline void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::trsmBatched: not implemented"); +} + +template <> +TORCH_CUDA_CU_API void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void trsmBatched>(CUDABLAS_TRSM_BATCHED_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void trsmBatched>(CUDABLAS_TRSM_BATCHED_ARGTYPES(c10::complex)); + +/* LEVEL 2 BLAS FUNCTIONS */ + +#define CUDABLAS_GEMV_ARGTYPES(Dtype) \ + char trans, int64_t m, int64_t n, Dtype alpha, const Dtype *a, int64_t lda, \ + const Dtype *x, int64_t incx, Dtype beta, Dtype *y, int64_t incy + +template +inline void gemv(CUDABLAS_GEMV_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::gemv: not implemented"); +} + +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(double)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(float)); +template <> +void gemv>(CUDABLAS_GEMV_ARGTYPES(c10::complex)); +template <> +void gemv>(CUDABLAS_GEMV_ARGTYPES(c10::complex)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(at::Half)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(at::BFloat16)); + +/* LEVEL 1 BLAS FUNCTIONS */ + +#define CUDABLAS_DOT_ARGTYPES(Dtype) \ + cublasHandle_t handle, int n, const Dtype *x, int incx, const Dtype *y, \ + int incy, Dtype *result + +template +inline void dot(CUDABLAS_DOT_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::dot: not implemented"); +} + +template <> +void dot(CUDABLAS_DOT_ARGTYPES(double)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(float)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(at::Half)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(at::BFloat16)); +template <> +void dot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); +template <> +void dot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); + +template +inline void vdot(CUDABLAS_DOT_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::vdot: not implemented"); +} + +template <> +void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); +template <> +void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); + +#define CUDABLAS_GETRS_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasOperation_t trans, \ + int n, int nrhs, Dtype** dA_array, int lda, int* ipiv_array, \ + Dtype** dB_array, int ldb, int* info_array, int batchsize + +#define CUDABLAS_GEQRF_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, int m, int n, Dtype **A_array, int lda, \ + Dtype **tau_array, int *info, int batchsize + +#define CUDABLAS_GETRF_ARGTYPES(Dtype) \ + int n, Dtype** dA_array, int ldda, int* ipiv_array, int* info_array, int batchsize + +#define CUDABLAS_GELS_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasOperation_t trans, \ + int m, int n, int nrhs, Dtype** dA_array, int ldda, \ + Dtype** dC_array, int lddc, int* info, int *devInfoArray, int batchSize + +template +void getrsBatched(CUDABLAS_GETRS_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::getrsBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void getrsBatched(CUDABLAS_GETRS_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void getrsBatched(CUDABLAS_GETRS_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void getrsBatched>(CUDABLAS_GETRS_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void getrsBatched>(CUDABLAS_GETRS_ARGTYPES(c10::complex)); + +template +void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::geqrfBatched: not implemented"); +} +template <> +TORCH_CUDA_CU_API void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void geqrfBatched>( + CUDABLAS_GEQRF_BATCHED_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void geqrfBatched>( + CUDABLAS_GEQRF_BATCHED_ARGTYPES(c10::complex)); + +template +void getrfBatched(CUDABLAS_GETRF_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::getrfBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void getrfBatched(CUDABLAS_GETRF_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void getrfBatched(CUDABLAS_GETRF_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void getrfBatched>(CUDABLAS_GETRF_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void getrfBatched>(CUDABLAS_GETRF_ARGTYPES(c10::complex)); + +template +void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::gelsBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::complex)); + +} // namespace at::cuda::blas + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h new file mode 100644 index 0000000000000000000000000000000000000000..6542be0367efa5eb4170cfea3aebfe58ab274c8f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// Test these using #if AT_CUDNN_ENABLED(), not #ifdef, so that it's +// obvious if you forgot to include Config.h +// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined +// +// NB: This header MUST NOT be included from other headers; it should +// only be included from C++ files. +#define AT_CUDNN_ENABLED() 1 +#define AT_CUSPARSELT_ENABLED() 1 +#define AT_HIPSPARSELT_ENABLED() 0 +#define AT_ROCM_ENABLED() 0 +#define AT_MAGMA_ENABLED() 1 + +// Needed for hipMAGMA to correctly identify implementation +#if (AT_ROCM_ENABLED() && AT_MAGMA_ENABLED()) +#define HAVE_HIP 1 +#endif + +#define NVCC_FLAGS_EXTRA "-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120" + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h new file mode 100644 index 0000000000000000000000000000000000000000..debe2d8ba8e0806f4db9ff25f7b2874e6b369ddb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// Preserved for BC, as many files depend on these includes +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h new file mode 100644 index 0000000000000000000000000000000000000000..20f00cf2343a6128d7f92107fd8db98eac6d65d2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// Light-weight version of CUDAContext.h with fewer transitive includes + +#include +#include +#include + +#include +#include +#include + +// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also +// added bf16 support +#include + +#ifdef CUDART_VERSION +#include +#endif + +#if defined(USE_CUDSS) +#include +#endif + +#if defined(USE_ROCM) +#include +#endif + +#include +#include + +namespace c10 { +struct Allocator; +} + +namespace at::cuda { + +/* +A common CUDA interface for ATen. + +This interface is distinct from CUDAHooks, which defines an interface that links +to both CPU-only and CUDA builds. That interface is intended for runtime +dispatch and should be used from files that are included in both CPU-only and +CUDA builds. + +CUDAContext, on the other hand, should be preferred by files only included in +CUDA builds. It is intended to expose CUDA functionality in a consistent +manner. + +This means there is some overlap between the CUDAContext and CUDAHooks, but +the choice of which to use is simple: use CUDAContext when in a CUDA-only file, +use CUDAHooks otherwise. + +Note that CUDAContext simply defines an interface with no associated class. +It is expected that the modules whose functions compose this interface will +manage their own state. There is only a single CUDA context/state. +*/ + +/** + * DEPRECATED: use device_count() instead + */ +inline int64_t getNumGPUs() { + return c10::cuda::device_count(); +} + +/** + * CUDA is available if we compiled with CUDA, and there are one or more + * devices. If we compiled with CUDA but there is a driver problem, etc., + * this function will report CUDA is not available (rather than raise an error.) + */ +inline bool is_available() { + return c10::cuda::device_count() > 0; +} + +TORCH_CUDA_CPP_API cudaDeviceProp* getCurrentDeviceProperties(); + +TORCH_CUDA_CPP_API int warp_size(); + +TORCH_CUDA_CPP_API cudaDeviceProp* getDeviceProperties(c10::DeviceIndex device); + +TORCH_CUDA_CPP_API bool canDeviceAccessPeer( + c10::DeviceIndex device, + c10::DeviceIndex peer_device); + +TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); + +/* Handles */ +TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); +TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); +TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); + +TORCH_CUDA_CPP_API void clearCublasWorkspaces(); +struct WorkspaceMapWithMutex { + std::map, at::DataPtr> map; + std::shared_mutex mutex; +}; + +TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublas_handle_stream_to_workspace(); +TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace(); +TORCH_CUDA_CPP_API size_t getChosenWorkspaceSize(); +TORCH_CUDA_CPP_API size_t getCUDABlasLtWorkspaceSize(); +TORCH_CUDA_CPP_API void* getCUDABlasLtWorkspace(); + +#if defined(CUDART_VERSION) || defined(USE_ROCM) +TORCH_CUDA_CPP_API cusolverDnHandle_t getCurrentCUDASolverDnHandle(); +#endif + +#if defined(USE_CUDSS) +TORCH_CUDA_CPP_API cudssHandle_t getCurrentCudssHandle(); +#endif + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h new file mode 100644 index 0000000000000000000000000000000000000000..874979b76c7b95c86e8e69166239e4d3ae7375d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h @@ -0,0 +1,107 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at::cuda { + +template +cudaDataType getCudaDataType() { + static_assert(false && sizeof(scalar_t), "Cannot convert type to cudaDataType."); + return {}; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16F; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_32F; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_64F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_16F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_32F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_64F; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_8U; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_8I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_32I; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_64I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16BF; +} + +inline cudaDataType ScalarTypeToCudaDataType(const c10::ScalarType& scalar_type) { + switch (scalar_type) { + case c10::ScalarType::Byte: + return CUDA_R_8U; + case c10::ScalarType::Char: + return CUDA_R_8I; + case c10::ScalarType::Int: + return CUDA_R_32I; + case c10::ScalarType::Half: + return CUDA_R_16F; + case c10::ScalarType::Float: + return CUDA_R_32F; + case c10::ScalarType::Double: + return CUDA_R_64F; + case c10::ScalarType::ComplexHalf: + return CUDA_C_16F; + case c10::ScalarType::ComplexFloat: + return CUDA_C_32F; + case c10::ScalarType::ComplexDouble: + return CUDA_C_64F; + case c10::ScalarType::Short: + return CUDA_R_16I; + case c10::ScalarType::Long: + return CUDA_R_64I; + case c10::ScalarType::BFloat16: + return CUDA_R_16BF; +#if !defined(USE_ROCM) || ROCM_VERSION >= 60300 + case c10::ScalarType::Float8_e4m3fn: + return CUDA_R_8F_E4M3; + case c10::ScalarType::Float8_e5m2: + return CUDA_R_8F_E5M2; +#endif +#if defined(USE_ROCM) + case c10::ScalarType::Float8_e4m3fnuz: + return HIP_R_8F_E4M3_FNUZ; + case c10::ScalarType::Float8_e5m2fnuz: + return HIP_R_8F_E5M2_FNUZ; +#endif +#if (defined(CUDA_VERSION) && CUDA_VERSION >= 12080) || (defined(USE_ROCM) && ROCM_VERSION >= 70000) + case c10::ScalarType::Float4_e2m1fn_x2: + return CUDA_R_4F_E2M1; +#endif + default: + TORCH_INTERNAL_ASSERT(false, "Cannot convert ScalarType ", scalar_type, " to cudaDataType.") + } +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h new file mode 100644 index 0000000000000000000000000000000000000000..ef242a68e056e8feb2d30963ab975d61bf5eb756 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at::cuda { + +inline Device getDeviceFromPtr(void* ptr) { + cudaPointerAttributes attr{}; + + AT_CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr)); + +#if !defined(USE_ROCM) + TORCH_CHECK(attr.type != cudaMemoryTypeUnregistered, + "The specified pointer resides on host memory and is not registered with any CUDA device."); +#endif + + return {c10::DeviceType::CUDA, static_cast(attr.device)}; +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h new file mode 100644 index 0000000000000000000000000000000000000000..4d8645caf3b69a06b33771beaea217bfb9e56db5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h @@ -0,0 +1,336 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* +* `cudaEventExternal` is a torch-specific flag that is used to +* indicate that the CUDAEvent will be used only for synchronization +* with work outside of the cuda graph, rather than creation of +* cross-stream dependencies within a cuda graph. Resources: +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-c-programming-guide/index.html#cross-stream-dependencies-and-events +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3457b81d1d32c6a00f6132fbc2693d47 +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0c23426b7252eaa9cef695859991304e +*/ +#define cudaEventExternal 0x08 + +namespace at::cuda { + +/* +* CUDAEvents are movable not copyable wrappers around CUDA's events. +* +* CUDAEvents are constructed lazily when first recorded unless it is +* reconstructed from a cudaIpcEventHandle_t. The event has a device, and this +* device is acquired from the first recording stream. However, if reconstructed +* from a handle, the device should be explicitly specified; or if ipc_handle() is +* called before the event is ever recorded, it will use the current device. +* Later streams that record the event must match this device. +*/ +struct TORCH_CUDA_CPP_API CUDAEvent { + // Constructors + // Default value for `flags` is specified below - it's cudaEventDisableTiming + CUDAEvent() noexcept = default; + CUDAEvent(unsigned int flags) noexcept : flags_{flags} {} + + CUDAEvent( + DeviceIndex device_index, const cudaIpcEventHandle_t* handle) : device_index_(device_index) { + CUDAGuard guard(device_index_); + + AT_CUDA_CHECK(cudaIpcOpenEventHandle(&event_, *handle)); + is_created_ = true; + } + + // Note: event destruction done on creating device to avoid creating a + // CUDA context on other devices. + ~CUDAEvent() { + try { + if (is_created_) { + CUDAGuard guard(device_index_); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_deletion(at::kCUDA, reinterpret_cast(event_)); + } + AT_CUDA_CHECK(cudaEventDestroy(event_)); + } + } catch (...) { /* No throw */ } + } + + CUDAEvent(const CUDAEvent&) = delete; + CUDAEvent& operator=(const CUDAEvent&) = delete; + + CUDAEvent(CUDAEvent&& other) noexcept { moveHelper(std::move(other)); } + CUDAEvent& operator=(CUDAEvent&& other) noexcept { + if (this != &other) { + moveHelper(std::move(other)); + } + return *this; + } + + operator cudaEvent_t() const { return event(); } + + // Less than operator (to allow use in sets) + friend bool operator<(const CUDAEvent& left, const CUDAEvent& right) { + return left.event_ < right.event_; + } + + std::optional device() const { + if (is_created_) { + return at::Device(at::kCUDA, device_index_); + } else { + return {}; + } + } + + bool isCreated() const { return is_created_; } + DeviceIndex device_index() const {return device_index_;} + cudaEvent_t event() const { return event_; } + + // Note: cudaEventQuery can be safely called from any device + bool query() const { + if (!is_created_) { + return true; + } + + cudaError_t err = cudaEventQuery(event_); + if (err == cudaSuccess) { + return true; + } else if (err != cudaErrorNotReady) { + C10_CUDA_CHECK(err); + } else { + // ignore and clear the error if not ready + (void)cudaGetLastError(); + } + + return false; + } + + void record() { record(getCurrentCUDAStream()); } + + void recordOnce(const CUDAStream& stream) { + if (!was_recorded_) record(stream); + } + + // Note: cudaEventRecord must be called on the same device as the event. + void record(const CUDAStream& stream) { + if (!is_created_) { + createEvent(stream.device_index()); + } + + TORCH_CHECK(device_index_ == stream.device_index(), "Event device ", device_index_, + " does not match recording stream's device ", stream.device_index(), "."); + CUDAGuard guard(device_index_); + +#ifndef USE_ROCM + // it is an error to use cudaEventRecordExternal when not doing stream capture + unsigned int flags = (c10::cuda::currentStreamCaptureStatusMayInitCtx() != c10::cuda::CaptureStatus::None && external_) ? cudaEventRecordExternal : cudaEventRecordDefault; + AT_CUDA_CHECK(cudaEventRecordWithFlags(event_, stream, flags)); +#else + AT_CUDA_CHECK(cudaEventRecord(event_, stream)); +#endif + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_record(at::kCUDA, + reinterpret_cast(event_), + reinterpret_cast(stream.stream()) + ); + } + was_recorded_ = true; + } + + // Note: cudaStreamWaitEvent must be called on the same device as the stream. + // The event has no actual GPU resources associated with it. + void block(const CUDAStream& stream) { + if (is_created_) { + CUDAGuard guard(stream.device_index()); +#ifndef USE_ROCM + // it is an error to use cudaEventWaitExternal when not doing stream capture + unsigned int flags = (c10::cuda::currentStreamCaptureStatusMayInitCtx() != c10::cuda::CaptureStatus::None && external_) ? cudaEventWaitExternal : cudaEventWaitDefault; + AT_CUDA_CHECK(cudaStreamWaitEvent(stream, event_, flags)); +#else + AT_CUDA_CHECK(cudaStreamWaitEvent(stream, event_)); +#endif + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_wait(at::kCUDA, + reinterpret_cast(event_), + reinterpret_cast(stream.stream()) + ); + } + } + } + + // Note: cudaEventElapsedTime can be safely called from any device + float elapsed_time(const CUDAEvent& other) const { + TORCH_CHECK_VALUE( + !(flags_ & cudaEventDisableTiming) && !(other.flags_ & cudaEventDisableTiming), + "Both events must be created with argument 'enable_timing=True'."); + TORCH_CHECK_VALUE( + is_created_ && other.isCreated(), + "Both events must be recorded before calculating elapsed time."); + TORCH_CHECK( + query() && other.query(), + "Both events must be completed before calculating elapsed time."); + + float time_ms = 0; + // We do not strictly have to set the device index to the same as our event, + // but if we don't and the current device is not initialized, it will + // create a new cuda context, which will consume a lot of memory. + CUDAGuard guard(device_index_); + // raise cudaErrorNotReady if either event is recorded but not yet completed + AT_CUDA_CHECK(cudaEventElapsedTime(&time_ms, event_, other.event_)); + return time_ms; + } + + // Note: cudaEventSynchronize can be safely called from any device + void synchronize() const { + if (is_created_) { + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_synchronization(at::kCUDA, reinterpret_cast(event_)); + } + AT_CUDA_CHECK(cudaEventSynchronize(event_)); + } + } + + // Note: cudaIpcGetEventHandle must be called on the same device as the event + void ipc_handle(cudaIpcEventHandle_t * handle) { + if (!is_created_) { + // this CUDAEvent object was initially constructed from flags but event_ + // is not created yet. + createEvent(getCurrentCUDAStream().device_index()); + } + CUDAGuard guard(device_index_); + AT_CUDA_CHECK(cudaIpcGetEventHandle(handle, event_)); + } + +private: + unsigned int flags_ = cudaEventDisableTiming; + bool is_created_ = false; + bool was_recorded_ = false; + bool external_ = false; + DeviceIndex device_index_ = -1; + cudaEvent_t event_{}; + + void createEvent(DeviceIndex device_index) { + external_ = (flags_ & cudaEventExternal) != 0; +#ifdef USE_ROCM + TORCH_CHECK(!external_, "External events are disallowed in rocm"); +#endif + flags_ &= ~cudaEventExternal; + device_index_ = device_index; + CUDAGuard guard(device_index_); + AT_CUDA_CHECK(cudaEventCreateWithFlags(&event_, flags_)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_creation(at::kCUDA, reinterpret_cast(event_)); + } + is_created_ = true; + } + + void moveHelper(CUDAEvent&& other) { + // Transfer ownership of all state from other to this + flags_ = other.flags_; + is_created_ = other.is_created_; + was_recorded_ = other.was_recorded_; + external_ = other.external_; + device_index_ = other.device_index_; + event_ = other.event_; + + // Reset other to a valid empty state to prevent double-free + // The moved-from object must not attempt to destroy the event + other.is_created_ = false; + other.event_ = cudaEvent_t{}; + } +}; + +// EventPool - Thread-safe pool of CUDA events to avoid expensive cudaEventCreate +// calls. cudaEventCreate when concurrently invoked from multiple threads can be +// very expensive (especially on certain device/driver combinations). +using CUDAEventPtr = + std::unique_ptr>; + +class EventPool { + public: + EventPool() : pools_(at::cuda::device_count()) {} + + CUDAEventPtr get(const DeviceIndex device) { + // If the device is invalid, return a default event and no pooling + if (device < 0 || device >= (DeviceIndex)pools_.size()) { + auto deleter = [](CUDAEvent* event) { + delete event; + }; + return CUDAEventPtr( + std::make_unique(cudaEventDisableTiming).release(), deleter); + } + + auto& pool = pools_[device]; + + // Create a destructor that returns the event to the appropriate device pool + auto destructor = [&pool](CUDAEvent* event) noexcept { + if (event != nullptr) { + std::lock_guard lock(pool.mutex_); + pool.event_pool_.emplace_back(event); + } + }; + + { + std::lock_guard lock(pool.mutex_); + if (!pool.event_pool_.empty()) { + auto event = std::move(pool.event_pool_.back()); + pool.event_pool_.pop_back(); + return CUDAEventPtr(event.release(), destructor); + } + } + + return CUDAEventPtr( + std::make_unique(cudaEventDisableTiming).release(), + destructor); + } + + void empty_cache() { + for (auto& pool : pools_) { + std::lock_guard lock(pool.mutex_); + pool.event_pool_.clear(); + } + } + + void init_num_events(const size_t num_events) { + for (DeviceIndex device_idx = 0; device_idx < at::cuda::device_count(); ++device_idx) { + CUDAGuard device_guard(device_idx); + std::vector temp_events; + temp_events.reserve(num_events); + for (size_t i = 0; i < num_events; ++i) { + auto event = get(device_idx); + // Record the event to ensure it's properly initialized + event->record(); + temp_events.emplace_back(std::move(event)); + } + // Events will be returned to pool when temp_events is destroyed + } + } + + private: + struct alignas(64) PerDevicePool { + alignas(64) std::mutex mutex_; + std::vector> event_pool_; + }; + + std::vector pools_; +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..2955f6b8a5f3eb667b47dccdc1ae2e47e791cead --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h @@ -0,0 +1,185 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +namespace at { + +namespace cuda { +struct CUDAGraph; +} + +/** + * Note [CUDA Graph-safe RNG states] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Strategy: + * ~~~~~~~~~ + * (It helps to look at + * cuda/detail/PhiloxCudaStateRaw.cuh and + * cuda/detail/UnpackRaw.cuh + * while you read this.) + * + * A CUDA graph containing multiple RNG ops behaves like a + * single giant kernel from the perspective of ops external + * to the graph. During graph capture, logic in CUDAGeneratorImpl + * records the total of all offset increments that occur in the + * graphed region, and records the final total as the offset for + * the entire graph. + * + * When the graph reruns, the logic that reruns it + * increments this device's CUDA generator's offset + * by that total. + * + * Meanwhile, within the graph, at capture time, instead of + * populating PhiloxCudaStates with the uint64_t offset pulled + * directly from the global state, PhiloxCudaState uses a pointer + * to a one-element stream-local int64_t device tensor + * holding an initial offset value, and a uint64_t holding an + * intra-graph offset. (The intra-graph offset starts from zero + * when capture begins.) In each consumer kernel, + * at::cuda::philox::unpack computes the offset to use for this kernel + * as intra-graph offset + *initial offset. + * + * When the graph reruns, the logic that reruns it first + * fill_s the initial offset tensor with this device's + * CUDA generator's current offset. + * + * The control flow above ensures graphed execution is bitwise + * identical to eager execution as long as RNG ops are enqueued + * from a single thread, even if RNG ops and graphs containing + * RNG ops are enqueued and run simultaneously on multiple streams. + * + * Usage: + * ~~~~~~ + * PhiloxCudaState in this file, and unpack() in + * cuda/CUDAGraphsUtils.cuh allow non-divergent use of + * CUDAGeneratorImpl whether graph capture is underway or not. + * + * Each PhiloxCudaState instance should be used for one and only one + * consumer kernel. + * + * Example (see e.g. native/cuda/Dropout.cu): + * + * #include + * #include + * + * __global__ void kernel(..., PhiloxCudaState philox_args) { + * auto seeds = at::cuda::philox::unpack(philox_args); + * IndexType idx = blockIdx.x * blockDim.x + threadIdx.x; + * curandStatePhilox4_32_10_t state; + * curand_init(std::get<0>(seeds), // seed + * idx, // per-thread subsequence + * std::get<1>(seeds), // offset in subsequence + * &state); + * ... + * } + * + * host_caller(...) { + * PhiloxCudaState rng_engine_inputs; + * { + * // See Note [Acquire lock when using random generators] + * std::lock_guard lock(gen->mutex_); + * + * // gen could be HostState or DevState here! No divergent code needed! + * rng_engine_inputs = gen->philox_cuda_state(offset_increment); + * } + * kernel<<<...>>>(..., rng_engine_inputs); + * } + * + */ + +struct CUDAGeneratorState : public c10::intrusive_ptr_target { + uint64_t seed_; + uint64_t philox_offset_per_thread_; + uint64_t offset_intragraph_; + bool capturing_{}; + std::unordered_set registered_graphs_; + at::TensorBase seed_extragraph_; + at::TensorBase offset_extragraph_; + + CUDAGeneratorState( + uint64_t seed = default_rng_seed_val, + uint64_t philox_offset_per_thread = 0, + uint64_t offset_intragraph = 0) + : seed_(seed), + philox_offset_per_thread_(philox_offset_per_thread), + offset_intragraph_(offset_intragraph) {} + + void increase(uint64_t increment); + + void register_graph(cuda::CUDAGraph* graph); + void unregister_graph(cuda::CUDAGraph* graph); + + void capture_prologue(); + // capture_epilogue returns the wholegraph_increment + uint64_t capture_epilogue(); + void replay_prologue(uint64_t wholegraph_increment); + c10::intrusive_ptr clone(); +}; + +struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl { + // Constructors + CUDAGeneratorImpl(DeviceIndex device_index = -1); + CUDAGeneratorImpl( + DeviceIndex device_index, + c10::intrusive_ptr state_); + ~CUDAGeneratorImpl() override = default; + + // CUDAGeneratorImpl methods + std::shared_ptr clone() const; + void set_current_seed(uint64_t seed) override; + void set_offset(uint64_t offset) override; + uint64_t get_offset() const override; + uint64_t current_seed() const override; + uint64_t seed() override; + void set_state(const c10::TensorImpl& new_state) override; + c10::intrusive_ptr get_state() const override; + void graphsafe_set_state( + const c10::intrusive_ptr& state) override; + c10::intrusive_ptr graphsafe_get_state() const override; + + void set_philox_offset_per_thread(uint64_t offset); + uint64_t philox_offset_per_thread() const; + + void register_graph(cuda::CUDAGraph* graph); + void unregister_graph(cuda::CUDAGraph* graph); + + // Generates a PhiloxCudaState with a specified increment, and increment + // current state + PhiloxCudaState philox_cuda_state(uint64_t increment); + + bool reset_rnn_state() { + return !no_reset_rnn_state_.test_and_set(); + } + + // Temporarily accommodates call sites that use philox_engine_inputs. + // Allows incremental refactor of call sites to use philox_cuda_state. + std::pair philox_engine_inputs(uint64_t increment); + + static c10::DeviceType device_type(); + + private: + CUDAGeneratorImpl* clone_impl() const override; + + c10::intrusive_ptr state_; + std::atomic_flag no_reset_rnn_state_; +}; + +namespace cuda::detail { + +TORCH_CUDA_CPP_API const Generator& getDefaultCUDAGenerator( + DeviceIndex device_index = -1); +TORCH_CUDA_CPP_API Generator createCUDAGenerator(DeviceIndex device_index = -1); + +} // namespace cuda::detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h new file mode 100644 index 0000000000000000000000000000000000000000..9687b067d571c42f3ea5d6a417ec700b7dc122d0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h @@ -0,0 +1,100 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace at { + +struct Generator; +struct CUDAGeneratorImpl; +struct CUDAGeneratorState; + +namespace cuda { + +// Standalone way to get a unique mempool id usable as a pool=... argument +// to CUDAGraph::capture_begin +TORCH_CUDA_CPP_API MempoolId_t graph_pool_handle(); + +struct TORCH_CUDA_CPP_API CUDAGraph { + CUDAGraph(bool keep_graph=false); + ~CUDAGraph(); + + // See Note [Explicit Registration of Generators to the CUDA Graph] + void register_generator_state(c10::intrusive_ptr state); + void register_generator_state(const at::Generator& generator); + void capture_begin( + MempoolId_t pool = {0, 0}, + cudaStreamCaptureMode capture_mode = cudaStreamCaptureModeGlobal); + void capture_end(); + void instantiate(); + void replay(); + void reset(); + MempoolId_t pool(); + void enable_debug_mode(); + void debug_dump(const std::string& debug_path); + cudaGraph_t raw_cuda_graph(); + cudaGraphExec_t raw_cuda_graph_exec(); + + protected: + cudaGraph_t graph_ = nullptr; + cudaGraphExec_t graph_exec_ = nullptr; + + // internal states so reset() can do its best cleaning up + + // Set to true in capture_end if cudaStreamEndCapture succeeded + // Set back to false after instantiate() unless keep_graph=True or + // enable_debug_mode() was called on any CUDAGraph instance. + bool has_graph_ = false; + // Set to true in capture_end if cudaStreamEndCapture succeeded + bool capture_ended_ = false; + // Set to true in capture_end if cudaGraphInstantiate succeeded + bool has_graph_exec_ = false; + + // the ID assigned by cuda during graph capture, + // used to identify when a stream is participating in capture + CaptureId_t capture_id_ = 0; + + // uuid used to request a particular private mempool from CUDACachingAllocator. + // By default, this will be set to {id_, 0}. + // + // If capture_begin is called with "pool=other_graph.pool()", this graph's mempool_id_ + // will be set to the other graph's mempool_id_, and therefore share a mempool with the + // other graph. + // + // If capture_begin is called with "pool=handle" where "handle" came from graph_pool_handle(), + // it will share a mempool with any other captures that used "pool=handle". + // + // Sharing a mempool across graphs saves memory, and it's safe if you + // know you'll replay those graphs in the same order you captured them. + MempoolId_t mempool_id_; + + // Stream on which capture began + at::cuda::CUDAStream capture_stream_; + + // multiple generator states and their wholegraph_increments in this graph + // that are managed by the CUDA Graph + ska::flat_hash_map, uint64_t> + captured_generator_states_; + + // Device where capture occurred. Right now, for simplicity, we require all ops + // in a capture to run on the same device, but this is a limitation of CUDAGraph, + // not CUDA itself. We can straightforwardly modify CUDAGraph to support multi-device + // captures if needed. + // init capture_dev_ as UNDEFINED_DEVICE to check that it stores the real device id in the destructor + static constexpr c10::DeviceIndex UNDEFINED_DEVICE = -1; + c10::DeviceIndex capture_dev_{UNDEFINED_DEVICE}; + + bool keep_graph_; +}; + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..5ddebd32b16f3e0324563cc5ad25f59c493323bf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +// c10/cuda/CUDAGraphsC10Utils.h has utils used by both c10 and aten. +// This file adds utils used by aten only. + +namespace at::cuda { + +using CaptureId_t = c10::cuda::CaptureId_t; +using CaptureStatus = c10::cuda::CaptureStatus; + +// Use this version where you don't want to create a CUDA context if none exists. +inline CaptureStatus currentStreamCaptureStatus() { + // don't create a context if we don't have to + if (c10::cuda::hasPrimaryContext(c10::cuda::current_device())) { + return c10::cuda::currentStreamCaptureStatusMayInitCtx(); + } else { + return CaptureStatus::None; + } +} + +inline void assertNotCapturing(const std::string& attempt) { + auto status = currentStreamCaptureStatus(); + TORCH_CHECK(status == CaptureStatus::None, + attempt, + " during CUDA graph capture. If you need this call to be captured, " + "please file an issue. " + "Current cudaStreamCaptureStatus: ", + status); +} + +inline void errorIfCapturingCudnnBenchmark(const std::string& version_specific) { + auto status = currentStreamCaptureStatus(); + TORCH_CHECK(status == CaptureStatus::None, + "Current cudaStreamCaptureStatus: ", + status, + "\nCapturing ", + version_specific, + "is prohibited. Possible causes of this error:\n" + "1. No warmup iterations occurred before capture.\n" + "2. The convolutions you're trying to capture use dynamic shapes, " + "in which case capturing them is generally prohibited."); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h new file mode 100644 index 0000000000000000000000000000000000000000..3777a1938f43d5bcf5598d7c08e98f361b60d2b6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +// Forward declare green context as opaque ptr +typedef struct CUgreenCtx_st* CUgreenCtx; + +namespace at::cuda { + +class TORCH_CUDA_CPP_API GreenContext { + public: + // Green context creation + static std::unique_ptr create( + uint32_t num_sms, + std::optional device_id); + ~GreenContext() noexcept; + + // Delete copy constructor and assignment + GreenContext(const GreenContext&) = delete; + GreenContext& operator=(const GreenContext&) = delete; + + // Make this context current + void setContext(); + + void popContext(); + + private: + GreenContext(uint32_t device_id, uint32_t num_sms); + // Implement move operations + GreenContext(GreenContext&& other) noexcept; + GreenContext& operator=(GreenContext&& other) noexcept; + + int32_t device_id_ = -1; + CUgreenCtx green_ctx_ = nullptr; + CUcontext context_ = nullptr; + cudaStream_t parent_stream_ = nullptr; +}; +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h new file mode 100644 index 0000000000000000000000000000000000000000..9e4cfcb3602e14f31d8355d447361f1487c4e1dc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h @@ -0,0 +1,179 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include +#include +#define TORCH_ASSERT_ONLY_METHOD_OPERATORS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_FBGEMM_GENAI +#include +#endif + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +using at::blas::ScalingType; +using at::blas::SwizzleType; + +namespace at::cuda::scaled { + +static bool _scaled_mm_allowed_device(bool sm90_only=false, bool sm100_only=false) { +#ifdef USE_ROCM + static const std::vector archs = { + "gfx942", +#if ROCM_VERSION >= 60300 + "gfx1200", "gfx1201", +#endif +#if ROCM_VERSION >= 60500 + "gfx950" +#endif + }; + return at::detail::getCUDAHooks().isGPUArch(archs); +#else + auto dprops = at::cuda::getCurrentDeviceProperties(); + + if (sm90_only || sm100_only) { + return (sm90_only && dprops->major == 9) || (sm100_only && dprops->major == 10); + } else { + return dprops->major >= 9 || (dprops->major == 8 && dprops->minor == 9); + } +#endif +} + +#ifdef USE_ROCM +static bool _scaled_mm_is_fnuz() { + return at::detail::getCUDAHooks().isGPUArch({"gfx942"}); +} +#endif +/** + * Track concrete implementations available + */ +enum class ScaledGemmImplementation { + NONE = 0, + TENSORWISE_TENSORWISE = 1, + ROWWISE_ROWWISE = 2, + BLOCK_128x128_1x128 = 3, + BLOCK_1x128_128x128 = 4, + BLOCK_1x128_1x128 = 5, + MXFP8_MXFP8 = 6, + NVFP4_NVFP4 = 7, + NVFP4_NVFP4_SINGLE_SCALE = 8, + MXFP4_MXFP4 = 9, +}; + +/** + * Convert passed int (enum) from python back into a + * strictly-typed enum + */ +template +std::vector convert_int_to_enum(ArrayType& v) { + std::vector converted; + converted.reserve(v.size()); + + for (auto vi : v) { + converted.push_back(static_cast(vi)); + } + return converted; +} + +bool check_tensorwise_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + + +bool check_rowwise_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_nvfp4_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_nvfp4_recipe_single_scale + (c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_deepseek_recipe(ScalingType, + ScalingType, + c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_mxfp8_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_mxfp4_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +} // namespace at::native::cuda::blas::scaled + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h new file mode 100644 index 0000000000000000000000000000000000000000..0559bd1ec2aaf8133c695488538825021fa83a35 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#if defined(USE_ROCM) +#include +#define HIPSPARSE_VERSION ((hipsparseVersionMajor*100000) + (hipsparseVersionMinor*100) + hipsparseVersionPatch) +#endif + + +// cuSparse Generic API spsv function was added in CUDA 11.3.0 +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11500) +#define AT_USE_CUSPARSE_GENERIC_SPSV() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SPSV() 0 +#endif + +// cuSparse Generic API spsm function was added in CUDA 11.3.1 +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11600) +#define AT_USE_CUSPARSE_GENERIC_SPSM() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SPSM() 0 +#endif + +// cuSparse Generic API sddmm function was added in CUDA 11.2.1 (cuSparse version 11400) +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11400) +#define AT_USE_CUSPARSE_GENERIC_SDDMM() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SDDMM() 0 +#endif + +// BSR triangular solve functions were added in hipSPARSE 1.11.2 (ROCm 4.5.0) +#if defined(CUDART_VERSION) || defined(USE_ROCM) +#define AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() 1 +#else +#define AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() 0 +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h new file mode 100644 index 0000000000000000000000000000000000000000..8ffcdad0b58aa52b696a7ec0b1928e4b86c3865a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h @@ -0,0 +1,325 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +/* + Provides a subset of cuSPARSE functions as templates: + + csrgeam2(...) + + where scalar_t is double, float, c10::complex or c10::complex. + The functions are available in at::cuda::sparse namespace. +*/ + +#include +#include + +// NOLINTBEGIN(misc-misplaced-const) +namespace at::cuda::sparse { + +#define CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, int m, int n, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, int nnzA, \ + const scalar_t *csrSortedValA, const int *csrSortedRowPtrA, \ + const int *csrSortedColIndA, const scalar_t *beta, \ + const cusparseMatDescr_t descrB, int nnzB, \ + const scalar_t *csrSortedValB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + const scalar_t *csrSortedValC, const int *csrSortedRowPtrC, \ + const int *csrSortedColIndC, size_t *pBufferSizeInBytes + +template +inline void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::csrgeam2_bufferSizeExt: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(float)); +template <> +void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(double)); +template <> +void csrgeam2_bufferSizeExt>( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(c10::complex)); +template <> +void csrgeam2_bufferSizeExt>( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(c10::complex)); + +#define CUSPARSE_CSRGEAM2_NNZ_ARGTYPES() \ + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, \ + int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, \ + const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace + +template +inline void csrgeam2Nnz(CUSPARSE_CSRGEAM2_NNZ_ARGTYPES()) { + TORCH_CUDASPARSE_CHECK(cusparseXcsrgeam2Nnz( + handle, + m, + n, + descrA, + nnzA, + csrSortedRowPtrA, + csrSortedColIndA, + descrB, + nnzB, + csrSortedRowPtrB, + csrSortedColIndB, + descrC, + csrSortedRowPtrC, + nnzTotalDevHostPtr, + workspace)); +} + +#define CUSPARSE_CSRGEAM2_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, int m, int n, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, int nnzA, \ + const scalar_t *csrSortedValA, const int *csrSortedRowPtrA, \ + const int *csrSortedColIndA, const scalar_t *beta, \ + const cusparseMatDescr_t descrB, int nnzB, \ + const scalar_t *csrSortedValB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + scalar_t *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, \ + void *pBuffer + +template +inline void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::csrgeam2: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(float)); +template <> +void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(double)); +template <> +void csrgeam2>( + CUSPARSE_CSRGEAM2_ARGTYPES(c10::complex)); +template <> +void csrgeam2>( + CUSPARSE_CSRGEAM2_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRMM_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, \ + int kb, int nnzb, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + const scalar_t *B, int ldb, const scalar_t *beta, scalar_t *C, int ldc + +template +inline void bsrmm(CUSPARSE_BSRMM_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrmm: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrmm(CUSPARSE_BSRMM_ARGTYPES(float)); +template <> +void bsrmm(CUSPARSE_BSRMM_ARGTYPES(double)); +template <> +void bsrmm>(CUSPARSE_BSRMM_ARGTYPES(c10::complex)); +template <> +void bsrmm>(CUSPARSE_BSRMM_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRMV_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nb, int nnzb, \ + const scalar_t *alpha, const cusparseMatDescr_t descrA, \ + const scalar_t *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, \ + int blockDim, const scalar_t *x, const scalar_t *beta, scalar_t *y + +template +inline void bsrmv(CUSPARSE_BSRMV_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrmv: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrmv(CUSPARSE_BSRMV_ARGTYPES(float)); +template <> +void bsrmv(CUSPARSE_BSRMV_ARGTYPES(double)); +template <> +void bsrmv>(CUSPARSE_BSRMV_ARGTYPES(c10::complex)); +template <> +void bsrmv>(CUSPARSE_BSRMV_ARGTYPES(c10::complex)); + +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() + +#define CUSPARSE_BSRSV2_BUFFER_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, \ + const cusparseMatDescr_t descrA, scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, int *pBufferSizeInBytes + +template +inline void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_bufferSize: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(float)); +template <> +void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(double)); +template <> +void bsrsv2_bufferSize>( + CUSPARSE_BSRSV2_BUFFER_ARGTYPES(c10::complex)); +template <> +void bsrsv2_bufferSize>( + CUSPARSE_BSRSV2_BUFFER_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_analysis: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(float)); +template <> +void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(double)); +template <> +void bsrsv2_analysis>( + CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(c10::complex)); +template <> +void bsrsv2_analysis>( + CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSV2_SOLVE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, const scalar_t *x, scalar_t *y, \ + cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_solve: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(float)); +template <> +void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(double)); +template <> +void bsrsv2_solve>( + CUSPARSE_BSRSV2_SOLVE_ARGTYPES(c10::complex)); +template <> +void bsrsv2_solve>( + CUSPARSE_BSRSV2_SOLVE_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_BUFFER_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const cusparseMatDescr_t descrA, scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsm2Info_t info, int *pBufferSizeInBytes + +template +inline void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_bufferSize: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(float)); +template <> +void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(double)); +template <> +void bsrsm2_bufferSize>( + CUSPARSE_BSRSM2_BUFFER_ARGTYPES(c10::complex)); +template <> +void bsrsm2_bufferSize>( + CUSPARSE_BSRSM2_BUFFER_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_analysis: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(float)); +template <> +void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(double)); +template <> +void bsrsm2_analysis>( + CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(c10::complex)); +template <> +void bsrsm2_analysis>( + CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_SOLVE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const scalar_t *alpha, const cusparseMatDescr_t descrA, \ + const scalar_t *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, \ + int blockDim, bsrsm2Info_t info, const scalar_t *B, int ldb, \ + scalar_t *X, int ldx, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_solve: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(float)); +template <> +void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(double)); +template <> +void bsrsm2_solve>( + CUSPARSE_BSRSM2_SOLVE_ARGTYPES(c10::complex)); +template <> +void bsrsm2_solve>( + CUSPARSE_BSRSM2_SOLVE_ARGTYPES(c10::complex)); + +#endif // AT_USE_HIPSPARSE_TRIANGULAR_SOLVE + +} // namespace at::cuda::sparse +// NOLINTEND(misc-misplaced-const) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h new file mode 100644 index 0000000000000000000000000000000000000000..e70e503fa8fc95cac7592f48bc965040d50fbf67 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h @@ -0,0 +1,257 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#if defined(USE_ROCM) +#include +#endif + +namespace at::cuda::sparse { + +template +struct CuSparseDescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + TORCH_CUDASPARSE_CHECK(destructor(x)); + } + } +}; + +template +class CuSparseDescriptor { + public: + T* descriptor() const { + return descriptor_.get(); + } + T* descriptor() { + return descriptor_.get(); + } + + protected: + std::unique_ptr> descriptor_; +}; + +template +struct ConstCuSparseDescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + TORCH_CUDASPARSE_CHECK(destructor(x)); + } + } +}; + +template +class ConstCuSparseDescriptor { + public: + T* descriptor() const { + return descriptor_.get(); + } + T* descriptor() { + return descriptor_.get(); + } + + protected: + std::unique_ptr> descriptor_; +}; + +#if defined(USE_ROCM) +using cusparseMatDescr = std::remove_pointer_t; +using cusparseDnMatDescr = std::remove_pointer_t; +using cusparseDnVecDescr = std::remove_pointer_t; +using cusparseSpMatDescr = std::remove_pointer_t; +using cusparseSpMatDescr = std::remove_pointer_t; +using cusparseSpGEMMDescr = std::remove_pointer_t; +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() +using bsrsv2Info = std::remove_pointer_t; +using bsrsm2Info = std::remove_pointer_t; +#endif +#endif + +// NOTE: This is only needed for CUDA 11 and earlier, since CUDA 12 introduced +// API for const descriptors +cusparseStatus_t destroyConstDnMat(const cusparseDnMatDescr* dnMatDescr); + +class TORCH_CUDA_CPP_API CuSparseMatDescriptor + : public CuSparseDescriptor { + public: + CuSparseMatDescriptor() { + cusparseMatDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateMatDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } + + CuSparseMatDescriptor(bool upper, bool unit) { + cusparseFillMode_t fill_mode = + upper ? CUSPARSE_FILL_MODE_UPPER : CUSPARSE_FILL_MODE_LOWER; + cusparseDiagType_t diag_type = + unit ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT; + cusparseMatDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateMatDescr(&raw_descriptor)); + TORCH_CUDASPARSE_CHECK(cusparseSetMatFillMode(raw_descriptor, fill_mode)); + TORCH_CUDASPARSE_CHECK(cusparseSetMatDiagType(raw_descriptor, diag_type)); + descriptor_.reset(raw_descriptor); + } +}; + +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() + +class TORCH_CUDA_CPP_API CuSparseBsrsv2Info + : public CuSparseDescriptor { + public: + CuSparseBsrsv2Info() { + bsrsv2Info_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateBsrsv2Info(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +class TORCH_CUDA_CPP_API CuSparseBsrsm2Info + : public CuSparseDescriptor { + public: + CuSparseBsrsm2Info() { + bsrsm2Info_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateBsrsm2Info(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +#endif // AT_USE_HIPSPARSE_TRIANGULAR_SOLVE + +cusparseIndexType_t getCuSparseIndexType(const c10::ScalarType& scalar_type); + + class TORCH_CUDA_CPP_API CuSparseDnMatDescriptor + : public ConstCuSparseDescriptor< + cusparseDnMatDescr, + &cusparseDestroyDnMat> { + public: + explicit CuSparseDnMatDescriptor( + const Tensor& input, + int64_t batch_offset = -1); + }; + + class TORCH_CUDA_CPP_API CuSparseConstDnMatDescriptor + : public ConstCuSparseDescriptor< + const cusparseDnMatDescr, + &destroyConstDnMat> { + public: + explicit CuSparseConstDnMatDescriptor( + const Tensor& input, + int64_t batch_offset = -1); + cusparseDnMatDescr* unsafe_mutable_descriptor() const { + return const_cast(descriptor()); + } + cusparseDnMatDescr* unsafe_mutable_descriptor() { + return const_cast(descriptor()); + } + }; + + class TORCH_CUDA_CPP_API CuSparseDnVecDescriptor + : public ConstCuSparseDescriptor< + cusparseDnVecDescr, + &cusparseDestroyDnVec> { + public: + explicit CuSparseDnVecDescriptor(const Tensor& input); + }; + + class TORCH_CUDA_CPP_API CuSparseSpMatDescriptor + : public ConstCuSparseDescriptor< + cusparseSpMatDescr, + &cusparseDestroySpMat> {}; + +class TORCH_CUDA_CPP_API CuSparseSpMatCsrDescriptor + : public CuSparseSpMatDescriptor { + public: + explicit CuSparseSpMatCsrDescriptor(const Tensor& input, int64_t batch_offset = -1); + + std::tuple get_size() { + int64_t rows = 0, cols = 0, nnz = 0; + TORCH_CUDASPARSE_CHECK(cusparseSpMatGetSize( + this->descriptor(), + &rows, + &cols, + &nnz)); + return std::make_tuple(rows, cols, nnz); + } + + void set_tensor(const Tensor& input) { + auto crow_indices = input.crow_indices(); + auto col_indices = input.col_indices(); + auto values = input.values(); + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(crow_indices.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(col_indices.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(values.is_contiguous()); + TORCH_CUDASPARSE_CHECK(cusparseCsrSetPointers( + this->descriptor(), + crow_indices.data_ptr(), + col_indices.data_ptr(), + values.data_ptr())); + } + +#if AT_USE_CUSPARSE_GENERIC_SPSV() + void set_mat_fill_mode(bool upper) { + cusparseFillMode_t fill_mode = + upper ? CUSPARSE_FILL_MODE_UPPER : CUSPARSE_FILL_MODE_LOWER; + TORCH_CUDASPARSE_CHECK(cusparseSpMatSetAttribute( + this->descriptor(), + CUSPARSE_SPMAT_FILL_MODE, + &fill_mode, + sizeof(fill_mode))); + } + + void set_mat_diag_type(bool unit) { + cusparseDiagType_t diag_type = + unit ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT; + TORCH_CUDASPARSE_CHECK(cusparseSpMatSetAttribute( + this->descriptor(), + CUSPARSE_SPMAT_DIAG_TYPE, + &diag_type, + sizeof(diag_type))); + } +#endif +}; + +#if AT_USE_CUSPARSE_GENERIC_SPSV() +class TORCH_CUDA_CPP_API CuSparseSpSVDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpSVDescriptor() { + cusparseSpSVDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpSV_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; +#endif + +#if AT_USE_CUSPARSE_GENERIC_SPSM() +class TORCH_CUDA_CPP_API CuSparseSpSMDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpSMDescriptor() { + cusparseSpSMDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpSM_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; +#endif + +class TORCH_CUDA_CPP_API CuSparseSpGEMMDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpGEMMDescriptor() { + cusparseSpGEMMDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpGEMM_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +} // namespace at::cuda::sparse + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh new file mode 100644 index 0000000000000000000000000000000000000000..41d847081a8b1459d00c2a4b0ef42bfa8de25b4c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace at { +template <> +inline __half* Tensor::data() const { + return reinterpret_cast<__half*>(data()); +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..47a464883e6aa9d8486c87b1e4219cf870b2cbdd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at::cuda { + +// Check if every tensor in a list of tensors matches the current +// device. +inline bool check_device(ArrayRef ts) { + if (ts.empty()) { + return true; + } + Device curDevice = Device(kCUDA, current_device()); + for (const Tensor& t : ts) { + if (t.device() != curDevice) return false; + } + return true; +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..770d4730bc5c790ea853985060d5f2d9bd05bc80 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at::cuda { + +// +// A caching allocator for CUDA host allocations (pinned memory). +// +// This provides a drop-in replacement for THCudaHostAllocator, which reuses +// freed pinned (page-locked) memory allocations. This avoids device +// synchronizations due to cudaFreeHost calls. +// +// To ensure correct behavior, THCCachingHostAllocator_recordEvent must be +// called anytime a pointer from this allocator is used in a cudaMemcpyAsync +// call between host and device, and passed the corresponding context from the +// allocation. This is currently invoked by at::native::copy_kernel_cuda. +// +C10_DEPRECATED_MESSAGE( + "at::cuda::getCachingHostAllocator() is deprecated. Please use at::getHostAllocator(at::kCUDA) instead.") +inline TORCH_CUDA_CPP_API at::HostAllocator* getCachingHostAllocator() { + return at::getHostAllocator(at::kCUDA); +} + +// Records an event in the specified stream. The allocation corresponding to the +// input `ptr`/`ctx` will not be reused until the event has occurred. +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_recordEvent(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->record_event(...) instead.") +inline TORCH_CUDA_CPP_API bool CachingHostAllocator_recordEvent( + void* ptr, + void* ctx, + c10::cuda::CUDAStream stream) { + return getHostAllocator(at::kCUDA)->record_event(ptr, ctx, stream.unwrap()); +} + +// Releases cached pinned memory allocations via cudaHostFree +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_emptyCache() is deprecated. Please use at::getHostAllocator(at::kCUDA)->empty_cache() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_emptyCache() { + getHostAllocator(at::kCUDA)->empty_cache(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::HostAlloc(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->allocate(...) instead.") +inline TORCH_CUDA_CPP_API at::DataPtr HostAlloc(size_t size) { + return getHostAllocator(at::kCUDA)->allocate(size); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_getStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->get_stats() instead.") +inline TORCH_CUDA_CPP_API at::HostStats CachingHostAllocator_getStats() { + return getHostAllocator(at::kCUDA)->get_stats(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_resetAccumulatedStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_accumulated_stats() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetAccumulatedStats() { + getHostAllocator(at::kCUDA)->reset_accumulated_stats(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_resetPeakStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_peak_stats() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetPeakStats() { + getHostAllocator(at::kCUDA)->reset_peak_stats(); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..70b745c77b783f9fa2ab6e977b9f9b3f47464158 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh @@ -0,0 +1,126 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +__device__ __forceinline__ unsigned int ACTIVE_MASK() +{ +#if !defined(USE_ROCM) + return __activemask(); +#else +// will be ignored anyway + return 0xffffffff; +#endif +} + +__device__ __forceinline__ void WARP_SYNC(unsigned mask = 0xffffffff) { +#if !defined(USE_ROCM) + return __syncwarp(mask); +#endif +} + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int WARP_BALLOT(int predicate) +{ +return __ballot(predicate); +} +#else +__device__ __forceinline__ unsigned int WARP_BALLOT(int predicate, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __ballot_sync(mask, predicate); +#else + return __ballot(predicate); +#endif +} +#endif + +template +__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_xor_sync(mask, value, laneMask, width); +#else + return __shfl_xor(value, laneMask, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL(T value, int srcLane, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_sync(mask, value, srcLane, width); +#else + return __shfl(value, srcLane, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL_UP(T value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_up_sync(mask, value, delta, width); +#else + return __shfl_up(value, delta, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL_DOWN(T value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_down_sync(mask, value, delta, width); +#else + return __shfl_down(value, delta, width); +#endif +} + +#if defined(USE_ROCM) +template<> +__device__ __forceinline__ int64_t WARP_SHFL_DOWN(int64_t value, unsigned int delta, int width , unsigned int mask) +{ + //(HIP doesn't support int64_t). Trick from https://devblogs.nvidia.com/faster-parallel-reductions-kepler/ + int2 a = *reinterpret_cast(&value); + a.x = __shfl_down(a.x, delta); + a.y = __shfl_down(a.y, delta); + return *reinterpret_cast(&a); +} +#endif + +template<> +__device__ __forceinline__ c10::Half WARP_SHFL_DOWN(c10::Half value, unsigned int delta, int width, unsigned int mask) +{ + return c10::Half(WARP_SHFL_DOWN(value.x, delta, width, mask), c10::Half::from_bits_t{}); +} + +template +__device__ __forceinline__ c10::complex WARP_SHFL_DOWN(c10::complex value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return c10::complex( + __shfl_down_sync(mask, value.real_, delta, width), + __shfl_down_sync(mask, value.imag_, delta, width)); +#else + return c10::complex( + __shfl_down(value.real_, delta, width), + __shfl_down(value.imag_, delta, width)); +#endif +} + +/** + * For CC 3.5+, perform a load using __ldg + */ +template +__device__ __forceinline__ T doLdg(const T* p) { +#if __CUDA_ARCH__ >= 350 && !defined(USE_ROCM) + return __ldg(p); +#else + return *p; +#endif +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..c7d3e0fce43c029615ddd8a63c7269ccb014989b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at::detail { + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + ScalarType dtype, + std::optional device_opt, + std::optional memory_format_opt); + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt, + std::optional memory_format_opt); + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + const TensorOptions &options); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + ScalarType dtype, + std::optional device_opt); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + const TensorOptions &options); + + +} // namespace at::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h new file mode 100644 index 0000000000000000000000000000000000000000..f70102e9320176c04ffadf127cc46fcb55246d03 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h @@ -0,0 +1,235 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#if !defined(USE_ROCM) +#include +#else +#include +#endif + +#if defined(USE_CUDSS) +#include +#endif + +#include +#include +#include + + +namespace c10 { + +class CuDNNError : public c10::Error { + using Error::Error; +}; + +} // namespace c10 + +#define AT_CUDNN_FRONTEND_CHECK(EXPR, ...) \ + do { \ + auto error_object = EXPR; \ + if (!error_object.is_good()) { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN Frontend error: ", error_object.get_message()); \ + } \ + } while (0) \ + +#define AT_CUDNN_CHECK_WITH_SHAPES(EXPR, ...) AT_CUDNN_CHECK(EXPR, "\n", ##__VA_ARGS__) + +// See Note [CHECK macro] +#define AT_CUDNN_CHECK(EXPR, ...) \ + do { \ + cudnnStatus_t status = EXPR; \ + if (status != CUDNN_STATUS_SUCCESS) { \ + if (status == CUDNN_STATUS_NOT_SUPPORTED) { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN error: ", \ + cudnnGetErrorString(status), \ + ". This error may appear if you passed in a non-contiguous input.", ##__VA_ARGS__); \ + } else { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN error: ", cudnnGetErrorString(status), ##__VA_ARGS__); \ + } \ + } \ + } while (0) + +namespace at::cuda::blas { +C10_EXPORT const char* _cublasGetErrorEnum(cublasStatus_t error); +} // namespace at::cuda::blas + +#define TORCH_CUDABLAS_CHECK(EXPR) \ + do { \ + cublasStatus_t __err = EXPR; \ + TORCH_CHECK(__err == CUBLAS_STATUS_SUCCESS, \ + "CUDA error: ", \ + at::cuda::blas::_cublasGetErrorEnum(__err), \ + " when calling `" #EXPR "`"); \ + } while (0) + +const char *cusparseGetErrorString(cusparseStatus_t status); + +#define TORCH_CUDASPARSE_CHECK(EXPR) \ + do { \ + cusparseStatus_t __err = EXPR; \ + TORCH_CHECK(__err == CUSPARSE_STATUS_SUCCESS, \ + "CUDA error: ", \ + cusparseGetErrorString(__err), \ + " when calling `" #EXPR "`"); \ + } while (0) + +#if defined(USE_CUDSS) +namespace at::cuda::cudss { +C10_EXPORT const char* cudssGetErrorMessage(cudssStatus_t error); +} // namespace at::cuda::solver + +#define TORCH_CUDSS_CHECK(EXPR) \ + do { \ + cudssStatus_t __err = EXPR; \ + if (__err == CUDSS_STATUS_EXECUTION_FAILED) { \ + TORCH_CHECK_LINALG( \ + false, \ + "cudss error: ", \ + at::cuda::cudss::cudssGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ");\ + } else { \ + TORCH_CHECK( \ + __err == CUDSS_STATUS_SUCCESS, \ + "cudss error: ", \ + at::cuda::cudss::cudssGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. "); \ + } \ + } while (0) +#else +#define TORCH_CUDSS_CHECK(EXPR) EXPR +#endif + +namespace at::cuda::solver { +#if !defined(USE_ROCM) + +C10_EXPORT const char* cusolverGetErrorMessage(cusolverStatus_t status); + +constexpr const char* _cusolver_backend_suggestion = \ + "If you keep seeing this error, you may use " \ + "`torch.backends.cuda.preferred_linalg_library()` to try " \ + "linear algebra operators with other supported backends. " \ + "See https://pytorch.org/docs/stable/backends.html#torch.backends.cuda.preferred_linalg_library"; + +// When cuda >= 11.5, cusolver normally finishes execution and sets info array indicating convergence issue. +#define TORCH_CUSOLVER_CHECK(EXPR) \ + do { \ + cusolverStatus_t __err = EXPR; \ + if (__err == CUSOLVER_STATUS_INVALID_VALUE) { \ + TORCH_CHECK_LINALG( \ + false, \ + "cusolver error: ", \ + at::cuda::solver::cusolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ", \ + at::cuda::solver::_cusolver_backend_suggestion); \ + } else { \ + TORCH_CHECK( \ + __err == CUSOLVER_STATUS_SUCCESS, \ + "cusolver error: ", \ + at::cuda::solver::cusolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. ", \ + at::cuda::solver::_cusolver_backend_suggestion); \ + } \ + } while (0) + +#else // defined(USE_ROCM) + +C10_EXPORT const char* hipsolverGetErrorMessage(hipsolverStatus_t status); + +constexpr const char* _hipsolver_backend_suggestion = \ + "If you keep seeing this error, you may use " \ + "`torch.backends.cuda.preferred_linalg_library()` to try " \ + "linear algebra operators with other supported backends. " \ + "See https://pytorch.org/docs/stable/backends.html#torch.backends.cuda.preferred_linalg_library"; + +#define TORCH_CUSOLVER_CHECK(EXPR) \ + do { \ + hipsolverStatus_t __err = EXPR; \ + if (__err == HIPSOLVER_STATUS_INVALID_VALUE) { \ + TORCH_CHECK_LINALG( \ + false, \ + "hipsolver error: ", \ + at::cuda::solver::hipsolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ", \ + at::cuda::solver::_hipsolver_backend_suggestion); \ + } else { \ + TORCH_CHECK( \ + __err == HIPSOLVER_STATUS_SUCCESS, \ + "hipsolver error: ", \ + at::cuda::solver::hipsolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. ", \ + at::cuda::solver::_hipsolver_backend_suggestion); \ + } \ + } while (0) +#endif +} // namespace at::cuda::solver + +#define AT_CUDA_CHECK(EXPR) C10_CUDA_CHECK(EXPR) + +// For CUDA Driver API +// +// This is here instead of in c10 because NVRTC is loaded dynamically via a stub +// in ATen, and we need to use its nvrtcGetErrorString. +// See NOTE [ USE OF NVRTC AND DRIVER API ]. +#if !defined(USE_ROCM) + +#define AT_CUDA_DRIVER_CHECK(EXPR) \ + do { \ + CUresult __err = EXPR; \ + if (__err != CUDA_SUCCESS) { \ + const char* err_str; \ + [[maybe_unused]] CUresult get_error_str_err = \ + at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \ + if (get_error_str_err != CUDA_SUCCESS) { \ + TORCH_CHECK(false, "CUDA driver error: unknown error"); \ + } else { \ + TORCH_CHECK(false, "CUDA driver error: ", err_str); \ + } \ + } \ + } while (0) + +#else + +#define AT_CUDA_DRIVER_CHECK(EXPR) \ + do { \ + CUresult __err = EXPR; \ + if (__err != CUDA_SUCCESS) { \ + TORCH_CHECK(false, "CUDA driver error: ", static_cast(__err)); \ + } \ + } while (0) + +#endif + +// For CUDA NVRTC +// +// Note: As of CUDA 10, nvrtc error code 7, NVRTC_ERROR_BUILTIN_OPERATION_FAILURE, +// incorrectly produces the error string "NVRTC unknown error." +// The following maps it correctly. +// +// This is here instead of in c10 because NVRTC is loaded dynamically via a stub +// in ATen, and we need to use its nvrtcGetErrorString. +// See NOTE [ USE OF NVRTC AND DRIVER API ]. +#define AT_CUDA_NVRTC_CHECK(EXPR) \ + do { \ + nvrtcResult __err = EXPR; \ + if (__err != NVRTC_SUCCESS) { \ + if (static_cast(__err) != 7) { \ + TORCH_CHECK(false, "CUDA NVRTC error: ", at::globalContext().getNVRTC().nvrtcGetErrorString(__err)); \ + } else { \ + TORCH_CHECK(false, "CUDA NVRTC error: NVRTC_ERROR_BUILTIN_OPERATION_FAILURE"); \ + } \ + } \ + } while (0) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h new file mode 100644 index 0000000000000000000000000000000000000000..d7af173fa75a5cbdb8f565569be40f4035a02e5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +// Keep BC only +using c10::CaptureId_t; +using c10::MempoolId_t; + +// MemPool represents a pool of memory in a caching allocator. Currently, +// it's just the ID of the pool object maintained in the CUDACachingAllocator. +// +// An allocator pointer can be passed to the MemPool to define how the +// allocations should be done in the pool. For example: using a different +// system allocator such as ncclMemAlloc. +struct TORCH_CUDA_CPP_API MemPool { + MemPool( + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator = nullptr, + bool is_user_created = true, + bool use_on_oom = false, + bool no_split = false); + MemPool(const MemPool&) = delete; + MemPool(MemPool&&) = default; + MemPool& operator=(const MemPool&) = delete; + MemPool& operator=(MemPool&&) = default; + ~MemPool(); + + MempoolId_t id(); + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator(); + int use_count(); + c10::DeviceIndex device(); + static MempoolId_t graph_pool_handle(bool is_user_created = true); + + private: + static std::atomic uid_; + static std::atomic uuid_; + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator_; + bool is_user_created_; + MempoolId_t id_; + c10::DeviceIndex device_; +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh new file mode 100644 index 0000000000000000000000000000000000000000..9fb5ed34ead66a74a14993429b0d63585d60bf35 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh @@ -0,0 +1,126 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +// NumericLimits.cuh is a holder for numeric limits definitions of commonly used +// types. This header is very specific to ROCm HIP and may be removed in the future. +// This header is derived from the legacy THCNumerics.cuh. + +// The lower_bound and upper_bound constants are same as lowest and max for +// integral types, but are -inf and +inf for floating point types. They are +// useful in implementing min, max, etc. + +namespace at { + +template +struct numeric_limits { +}; + +// WARNING: the following at::numeric_limits definitions are there only to support +// HIP compilation for the moment. Use std::numeric_limits if you are not +// compiling for ROCm. +// from @colesbury: "The functions on numeric_limits aren't marked with +// __device__ which is why they don't work with ROCm. CUDA allows them +// because they're constexpr." + +namespace { + // ROCm doesn't like INFINITY too. + constexpr double inf = INFINITY; +} + +template <> +struct numeric_limits { + static inline __host__ __device__ bool lowest() { return false; } + static inline __host__ __device__ bool max() { return true; } + static inline __host__ __device__ bool lower_bound() { return false; } + static inline __host__ __device__ bool upper_bound() { return true; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ uint8_t lowest() { return 0; } + static inline __host__ __device__ uint8_t max() { return UINT8_MAX; } + static inline __host__ __device__ uint8_t lower_bound() { return 0; } + static inline __host__ __device__ uint8_t upper_bound() { return UINT8_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int8_t lowest() { return INT8_MIN; } + static inline __host__ __device__ int8_t max() { return INT8_MAX; } + static inline __host__ __device__ int8_t lower_bound() { return INT8_MIN; } + static inline __host__ __device__ int8_t upper_bound() { return INT8_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int16_t lowest() { return INT16_MIN; } + static inline __host__ __device__ int16_t max() { return INT16_MAX; } + static inline __host__ __device__ int16_t lower_bound() { return INT16_MIN; } + static inline __host__ __device__ int16_t upper_bound() { return INT16_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int32_t lowest() { return INT32_MIN; } + static inline __host__ __device__ int32_t max() { return INT32_MAX; } + static inline __host__ __device__ int32_t lower_bound() { return INT32_MIN; } + static inline __host__ __device__ int32_t upper_bound() { return INT32_MAX; } +}; + +template <> +struct numeric_limits { +#ifdef _MSC_VER + static inline __host__ __device__ int64_t lowest() { return _I64_MIN; } + static inline __host__ __device__ int64_t max() { return _I64_MAX; } + static inline __host__ __device__ int64_t lower_bound() { return _I64_MIN; } + static inline __host__ __device__ int64_t upper_bound() { return _I64_MAX; } +#else + static inline __host__ __device__ int64_t lowest() { return INT64_MIN; } + static inline __host__ __device__ int64_t max() { return INT64_MAX; } + static inline __host__ __device__ int64_t lower_bound() { return INT64_MIN; } + static inline __host__ __device__ int64_t upper_bound() { return INT64_MAX; } +#endif +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ at::Half lowest() { return at::Half(0xFBFF, at::Half::from_bits()); } + static inline __host__ __device__ at::Half max() { return at::Half(0x7BFF, at::Half::from_bits()); } + static inline __host__ __device__ at::Half lower_bound() { return at::Half(0xFC00, at::Half::from_bits()); } + static inline __host__ __device__ at::Half upper_bound() { return at::Half(0x7C00, at::Half::from_bits()); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ at::BFloat16 lowest() { return at::BFloat16(0xFF7F, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 max() { return at::BFloat16(0x7F7F, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 lower_bound() { return at::BFloat16(0xFF80, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 upper_bound() { return at::BFloat16(0x7F80, at::BFloat16::from_bits()); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ float lowest() { return -FLT_MAX; } + static inline __host__ __device__ float max() { return FLT_MAX; } + static inline __host__ __device__ float lower_bound() { return -static_cast(inf); } + static inline __host__ __device__ float upper_bound() { return static_cast(inf); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ double lowest() { return -DBL_MAX; } + static inline __host__ __device__ double max() { return DBL_MAX; } + static inline __host__ __device__ double lower_bound() { return -inf; } + static inline __host__ __device__ double upper_bound() { return inf; } +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h new file mode 100644 index 0000000000000000000000000000000000000000..4d2af0e230f8c9c0323a40822c503004611497a4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include + +namespace at::cuda { +namespace detail { +void init_p2p_access_cache(int64_t num_devices); +} + +TORCH_CUDA_CPP_API bool get_p2p_access(c10::DeviceIndex source_dev, c10::DeviceIndex dest_dev); +TORCH_CUDA_CPP_API bool get_fabric_access(c10::DeviceIndex device); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h new file mode 100644 index 0000000000000000000000000000000000000000..df0077e7b20d58738646b22b058a6d9474e8686c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h @@ -0,0 +1,10 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..4a7824426e76d22826d8489d8f75557a7a65876d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh @@ -0,0 +1,9 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..5fac9a3a6b3f472650eee44b5f18dd5f1795a5e0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at::cuda { + +inline TORCH_CUDA_CPP_API at::HostAllocator* getPinnedMemoryAllocator() { + return at::getHostAllocator(at::kCUDA); +} +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..233c307bc5a8668b1728441c645bcf550159140c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +// Collection of in-kernel scan / prefix sum utilities + +namespace at::cuda { + +// Inclusive prefix sum for binary vars using intra-warp voting + +// shared memory +template +__device__ void inclusiveBinaryPrefixScan(T* smem, bool in, T* out, BinaryFunction binop) { + // Within-warp, we use warp voting. +#if defined (USE_ROCM) + unsigned long long int vote = WARP_BALLOT(in); + T index = __popcll(getLaneMaskLe() & vote); + T carry = __popcll(vote); +#else + T vote = WARP_BALLOT(in); + T index = __popc(getLaneMaskLe() & vote); + T carry = __popc(vote); +#endif + + int warp = threadIdx.x / C10_WARP_SIZE; + + // Per each warp, write out a value + if (getLaneId() == 0) { + smem[warp] = carry; + } + + __syncthreads(); + + // Sum across warps in one thread. This appears to be faster than a + // warp shuffle scan for CC 3.0+ + if (threadIdx.x == 0) { + int current = 0; + for (int i = 0; i < blockDim.x / C10_WARP_SIZE; ++i) { + T v = smem[i]; + smem[i] = binop(smem[i], current); + current = binop(current, v); + } + } + + __syncthreads(); + + // load the carry from the preceding warp + if (warp >= 1) { + index = binop(index, smem[warp - 1]); + } + + *out = index; + + if (KillWARDependency) { + __syncthreads(); + } +} + +// Exclusive prefix sum for binary vars using intra-warp voting + +// shared memory +template +__device__ void exclusiveBinaryPrefixScan(T* smem, bool in, T* out, T* carry, BinaryFunction binop) { + inclusiveBinaryPrefixScan(smem, in, out, binop); + + // Inclusive to exclusive + *out -= (T) in; + + // The outgoing carry for all threads is the last warp's sum + *carry = smem[at::ceil_div(blockDim.x, C10_WARP_SIZE) - 1]; + + if (KillWARDependency) { + __syncthreads(); + } +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h new file mode 100644 index 0000000000000000000000000000000000000000..04564bd165e0732756dbb79394ee6db08a40ed5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace at::cuda { + +// enqueues a kernel that spins for the specified number of cycles +TORCH_CUDA_CU_API void sleep(int64_t cycles); + +// enqueues a kernel that spins until a flag is cleared by a +// corresponding call to clear_flag() +TORCH_CUDA_CU_API void busy_wait_for_flag(); +TORCH_CUDA_CU_API void clear_flag(); + +// flushes instruction cache for ROCm; no-op for CUDA +TORCH_CUDA_CU_API void flush_icache(); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..083c2dd84d49ec0882b00753378a046d26be43fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +/// Allocator for Thrust to re-route its internal device allocations +/// to the THC allocator +class ThrustAllocator { +public: + typedef char value_type; + + char* allocate(std::ptrdiff_t size) { + return static_cast(c10::cuda::CUDACachingAllocator::raw_alloc(size)); + } + + void deallocate(char* p, size_t size) { + c10::cuda::CUDACachingAllocator::raw_delete(p); + } +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh new file mode 100644 index 0000000000000000000000000000000000000000..0f231c3fbda755cc7b2d2dc4ee4e758a42e1a505 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh @@ -0,0 +1,79 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#define TORCH_ASSERT_NO_OPERATORS +#include +#include + +namespace at::cuda::cub::detail { + +template +void radix_sort_pairs_impl( + const key_t* keys_in, + key_t* keys_out, + const OpaqueType* values_in, + OpaqueType* values_out, + int64_t n, + bool descending, + int64_t begin_bit, + int64_t end_bit) { + TORCH_CHECK( + n <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + using key_t_ = typename detail::cuda_type::type; + + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + + if (keys_out == nullptr) { + keys_out_owner = allocator->allocate(n * sizeof(key_t)); + keys_out = reinterpret_cast(keys_out_owner.get()); + } + + const key_t_* keys_in_ = reinterpret_cast(keys_in); + key_t_* keys_out_ = reinterpret_cast(keys_out); + + if (descending) { + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRadixSort::SortPairsDescending, + keys_in_, + keys_out_, + values_in, + values_out, + n, + begin_bit, + end_bit, + c10::cuda::getCurrentCUDAStream()); + } else { + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRadixSort::SortPairs, + keys_in_, + keys_out_, + values_in, + values_out, + n, + begin_bit, + end_bit, + c10::cuda::getCurrentCUDAStream()); + } +} + +#define AT_INSTANTIATE_SORT_PAIRS(key_t, value_size) \ + template void radix_sort_pairs_impl( \ + const key_t* keys_in, \ + key_t* keys_out, \ + const OpaqueType* values_in, \ + OpaqueType* values_out, \ + int64_t n, \ + bool descending, \ + int64_t begin_bit, \ + int64_t end_bit); + +#define AT_INSTANTIATE_SORT_PAIRS_8(scalar_t, ScalarType) \ + AT_INSTANTIATE_SORT_PAIRS(scalar_t, 8) + +} // namespace at::cuda::cub::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh new file mode 100644 index 0000000000000000000000000000000000000000..a6666f74796d226ce13b8162c249b3458dee1c93 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh @@ -0,0 +1,576 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include +#include +#include +#include + +#ifndef USE_ROCM +#include +#endif + +#include +#include + +#if USE_GLOBAL_CUB_WRAPPED_NAMESPACE() + +#include + +#else + +// include cub in a safe manner, see: +// https://github.com/pytorch/pytorch/pull/55292 +#undef CUB_NS_POSTFIX //undef to avoid redefinition warnings +#undef CUB_NS_PREFIX +#undef CUB_NS_QUALIFIER +#define CUB_NS_PREFIX namespace at_cuda_detail { +#define CUB_NS_POSTFIX } +#define CUB_NS_QUALIFIER ::at_cuda_detail::cub +#include +#undef CUB_NS_POSTFIX +#undef CUB_NS_PREFIX +#undef CUB_NS_QUALIFIER + +#endif + +#include +#include +#include + +// handle the temporary storage and 'twice' calls for cub API +#define CUB_WRAPPER(func, ...) do { \ + size_t temp_storage_bytes = 0; \ + AT_CUDA_CHECK(func(nullptr, temp_storage_bytes, __VA_ARGS__)); \ + auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \ + auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \ + AT_CUDA_CHECK(func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__));\ +} while (false) + +#ifdef USE_ROCM +#define NO_ROCM(x) +#define ROCM_HIPCUB(x) ::hipcub +#else +#define NO_ROCM(x) x +#define ROCM_HIPCUB(x) x +#endif + +#if CUB_V3_PLUS() +#include +#include +#include +#define ATEN_CUB_TRANSFORM_ITERATOR(ValueType, ...) ::thrust::transform_iterator<__VA_ARGS__> +#define ATEN_CUB_COUNTING_ITERATOR(...) ::thrust::counting_iterator<__VA_ARGS__> +#define ATEN_CUB_CONSTANT_ITERATOR(...) ::thrust::constant_iterator<__VA_ARGS__> +#define ATEN_CUB_MAXIMUM() ::cuda::maximum<>() +#else +#define ATEN_CUB_TRANSFORM_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::TransformInputIterator<__VA_ARGS__> +#define ATEN_CUB_COUNTING_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::CountingInputIterator<__VA_ARGS__> +#define ATEN_CUB_CONSTANT_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::ConstantInputIterator<__VA_ARGS__> +#define ATEN_CUB_MAXIMUM() NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::Max() +#endif + +#if defined(USE_ROCM) + +// backport https://github.com/NVIDIA/cub/pull/306 for c10::BFloat16 + +template <> +struct ROCM_HIPCUB(cub)::FpLimits +{ + static __host__ __device__ __forceinline__ c10::BFloat16 Max() { + unsigned short max_word = 0x7F7F; + return reinterpret_cast(max_word); + } + + static __host__ __device__ __forceinline__ c10::BFloat16 Lowest() { + unsigned short lowest_word = 0xFF7F; + return reinterpret_cast(lowest_word); + } +}; + +template <> +struct ROCM_HIPCUB(cub)::NumericTraits: + ROCM_HIPCUB(cub)::BaseTraits {}; + +#endif + +#if !defined(USE_ROCM) +namespace at::native { +namespace cub = ::at_cuda_detail::cub; +} // namespace at::native +#endif + +namespace at::cuda::cub { + +namespace detail { + +template +struct cuda_type { + using type = T; +}; +template<> +struct cuda_type { + using type = __half; +}; + +#if !defined(USE_ROCM) + +template<> +struct cuda_type { + using type = __nv_bfloat16; +}; + +#elif defined(USE_ROCM) + +template<> +struct cuda_type { + using type = hip_bfloat16; +}; + +#endif + +} // namespace detail + +template +inline void segmented_sort_pairs( + const key_t *keys_in, key_t *keys_out, + const value_t *values_in, value_t *values_out, + int64_t num_elements, int64_t num_segments, + OffsetIteratorT begin_offsets, OffsetIteratorT end_offsets, + bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8 +) { + TORCH_CHECK(num_elements <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + TORCH_CHECK(num_segments <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + using key_t_ = typename detail::cuda_type::type; + + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + + if (keys_out == nullptr) { + keys_out_owner = allocator->allocate(num_elements * sizeof(key_t)); + keys_out = reinterpret_cast(keys_out_owner.get()); + } + + const key_t_ *keys_in_ = reinterpret_cast(keys_in); + key_t_ *keys_out_ = reinterpret_cast(keys_out); + + if (descending) { + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSegmentedRadixSort::SortPairsDescending, + keys_in_, keys_out_, values_in, values_out, + num_elements, num_segments, begin_offsets, end_offsets, + begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); + } else { + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSegmentedRadixSort::SortPairs, + keys_in_, keys_out_, values_in, values_out, + num_elements, num_segments, begin_offsets, end_offsets, + begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); + } +} + +template +inline void unique_by_key( + KeysInputIteratorT keys_in, ValuesInputIteratorT values_in, + ValuesOutputIteratorT values_out, + NumSelectedIteratorT num_selected, int64_t num_input_items) +{ + // TODO: use thrust::discard_iterator to handle null keys_out when https://github.com/NVIDIA/cub/issues/406 is fixed. + using KeyT = typename std::iterator_traits::value_type; + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + keys_out_owner = allocator->allocate(num_input_items * sizeof(KeyT)); + auto keys_out_ = static_cast(keys_out_owner.get()); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSelect::UniqueByKey, + keys_in, values_in, keys_out_, values_out, num_selected, num_input_items, c10::cuda::getCurrentCUDAStream()); +} + +namespace impl { + +template +C10_LAUNCH_BOUNDS_1(1) +__global__ void transform_vals(InputIteratorT1 a, InputIteratorT2 b, OutputIteratorT out, ScanOpT scan_op){ + // NOTE: out here not the final scan output, but an intermediate of the accumulation type. + using acc_t = typename std::iterator_traits::value_type; + *out = scan_op(static_cast(*a), static_cast(*b)); +} + +// even though cub is supposed to support tensors with int_max elements, in reality it doesn't, +// so split at int_max/2 +constexpr int max_cub_size = std::numeric_limits::max() / 2 + 1; // 2**30 +} + +// non synchronizing cub call +// even though cub is supposed to support tensors with int_max elements, in reality it doesn't, +// so split at int_max/2 +template +inline void inclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT scan_op, int64_t num_items) { +#if defined(USE_ROCM) + //For ROCm, use hipCUB chained iterators + CUB_WRAPPER(NO_ROCM(detail)::hipcub::DeviceScan::InclusiveScan, + input, + output, + scan_op, + num_items, + at::cuda::getCurrentCUDAStream()); + C10_HIP_KERNEL_LAUNCH_CHECK(); +#else + // non synchronizing cub call + // even though cub is supposed to support tensors with int_max elements, in reality it doesn't, + // so split at int_max/2 + int size_cub = std::min(num_items, max_cub_size); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::InclusiveScan, + input, + output, + scan_op, + size_cub, + at::cuda::getCurrentCUDAStream()); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + using input_t = typename std::iterator_traits::value_type; + for (int64_t i = max_cub_size; i < num_items; i += max_cub_size) { + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr first_elem = allocator->allocate(sizeof(input_t)); + auto first_elem_ptr = reinterpret_cast(first_elem.get()); + + size_cub = std::min(num_items - i, max_cub_size); + impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( + output + i - 1, + input + i, + first_elem_ptr, + scan_op); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input + i + 1, + output + i, + scan_op, + ::at_cuda_detail::cub::FutureValue(first_elem_ptr), + size_cub, + at::cuda::getCurrentCUDAStream()); + } +#endif +} + +# if defined(CUDA_VERSION) || defined(USE_ROCM) + +template +struct BlockPrefixCallbackOp +{ + public: + T running_total; + + __host__ __device__ BlockPrefixCallbackOp(T running_total) : running_total(running_total) {} + + // Callback operator to be entered by the first warp of threads in the block. + // Thread-0 is responsible for returning a value for seeding the block-wide scan. + __host__ __device__ T operator()(T block_aggregate) + { + T old_prefix = running_total; + running_total += block_aggregate; + return old_prefix; + } +}; + +template +__global__ void final_scan_kernel(const T* d_in, T* d_out, T* agg, int64_t nelem, int iters_per_cta) { + int64_t offset = BLOCK_THREADS * ITEMS_PER_THREAD * iters_per_cta * (int64_t)blockIdx.x; + int64_t remaining = nelem - offset; + if (remaining <= 0) { + return; + } + + d_in += offset; + d_out += offset; + + using BlockLoadT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockLoad; + + // Specialize BlockStore type for our thread block (uses warp-striped loads for coalescing, then transposes in shared + // memory to a blocked arrangement) + using BlockStoreT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockStore; + + // Specialize BlockScan type for our thread block + using BlockScanT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockScan; + using BlockReduceT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockReduce; + + + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockStoreT::TempStorage store; + typename BlockScanT::TempStorage scan; + typename BlockReduceT::TempStorage reduce; + } temp_storage; + + // load agg and reduce my starting value + T agg_data; + agg_data = threadIdx.x >= blockIdx.x ? T(0) : agg[threadIdx.x]; + // if there are fewer threads than previous values to be read, + // read another value + if (threadIdx.x + blockDim.x < blockIdx.x) { + agg_data += agg[threadIdx.x + blockDim.x]; + } + T aggregate = BlockReduceT(temp_storage.reduce).Sum(agg_data); + __syncthreads(); + BlockPrefixCallbackOp prefix_op(aggregate); + + + // Per-thread tile data + T data[ITEMS_PER_THREAD]; + + for (int i=0; i= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockLoadT(temp_storage.load).Load(d_in, data); + } else { + #pragma unroll + for (int j=0; j= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockStoreT(temp_storage.store).Store(d_out, data); + } else { + BlockStoreT(temp_storage.store).Store(d_out, data, remaining); + } + d_in += BLOCK_THREADS * ITEMS_PER_THREAD; + d_out += BLOCK_THREADS * ITEMS_PER_THREAD; + remaining -= BLOCK_THREADS * ITEMS_PER_THREAD; + if (remaining <= 0) return; + __syncthreads(); + } + +} + +template +struct TransformFunctor { + __device__ aggT operator()(T value) const { + if constexpr (!nonzero) { + return value; + } else { + return (value != T(0)) ? 1 : 0; + } + } +}; + +template +__global__ void calc_block_sums(const T * d_in, aggT * agg, int64_t nelem, int iters_per_cta){ + int64_t offset = BLOCK_THREADS * ITEMS_PER_THREAD * iters_per_cta * (int64_t)blockIdx.x; + int64_t remaining = nelem - offset; + if (remaining <= 0) { + return; + } + d_in += offset; + + using BlockLoadT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockLoad; + using BlockReduceT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockReduce; + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockReduceT::TempStorage reduce; + } temp_storage; + aggT data[ITEMS_PER_THREAD]; + aggT agg_val = 0; + TransformFunctor transform_functor; + auto iter_in = ATEN_CUB_TRANSFORM_ITERATOR(aggT, TransformFunctor, const T*)(d_in, transform_functor); + for (int i=0; i= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockLoadT(temp_storage.load).Load(iter_in, data); + __syncthreads(); + agg_val += BlockReduceT(temp_storage.reduce).Sum(data); + + } else { + BlockLoadT(temp_storage.load).Load(iter_in, data, remaining, aggT(0)); + __syncthreads(); + agg_val += BlockReduceT(temp_storage.reduce).Sum(data); + } + iter_in += BLOCK_THREADS * ITEMS_PER_THREAD; + remaining -= BLOCK_THREADS * ITEMS_PER_THREAD; + if (remaining <= 0) { + // for nonzeros we need to write out last blocks + // accumulated value to be able to compute + // total number of nonzeros + if (nonzero && threadIdx.x == 0) { + agg[blockIdx.x] = agg_val; + } + return; + } + __syncthreads(); + + } + if (threadIdx.x == 0) { + agg[blockIdx.x] = agg_val; + } + +} + +template +struct NonZeroOp { + __host__ __device__ __forceinline__ int operator()(const T& a) const { + return (a != T(0)); + } +}; + +template +constexpr int block_threads(){ + if constexpr (size >=16) { + return 128; + } else if constexpr (size >=8) { + return 256; + } else { + return 512; + } +} + +template +inline void inclusive_deterministic_scan(const scalar_t * input, scalar_t * output, ScanOpT scan_op, int64_t num_items) { + static_assert(std::is_same_v>, ""); + constexpr int BLOCK_THREADS = block_threads(); + constexpr int ITEMS_PER_THREAD = 16; + auto grid_size = (num_items + BLOCK_THREADS * ITEMS_PER_THREAD - 1) / (BLOCK_THREADS * ITEMS_PER_THREAD); + const int64_t num_sms = at::cuda::getCurrentDeviceProperties()->multiProcessorCount; + + const int iters_per_cta = (grid_size + num_sms - 1)/num_sms; + grid_size = std::min(num_sms, grid_size); + // simple reduction in scan kernel handles at most 2 items per thread + TORCH_INTERNAL_ASSERT(2 * BLOCK_THREADS >= grid_size); + auto& allocator = *c10::cuda::CUDACachingAllocator::get(); + auto agg = allocator.allocate(grid_size * sizeof(scalar_t)); + calc_block_sums + <<>>( + input, (scalar_t*)agg.get(), num_items, iters_per_cta); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + final_scan_kernel + <<>>( + input, output, (scalar_t*)agg.get(), num_items, iters_per_cta); + C10_CUDA_KERNEL_LAUNCH_CHECK(); +} + +#endif + +template +inline void exclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT scan_op, InitValueT init_value, int64_t num_items) { +#if defined(USE_ROCM) + //For ROCm, use hipCUB chained iterators + CUB_WRAPPER(NO_ROCM(detail)::hipcub::DeviceScan::ExclusiveScan, + input, + output, + scan_op, + init_value, + num_items, + at::cuda::getCurrentCUDAStream()); + C10_HIP_KERNEL_LAUNCH_CHECK(); +#else + // non synchronizing cub call + // even though cub is supposed to support tensors with int_max elements, in reality it doesn't, + // so split at int_max/2 + int size_cub = std::min(num_items, max_cub_size); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input, + output, + scan_op, + init_value, + size_cub, + at::cuda::getCurrentCUDAStream()); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + for (int64_t i = max_cub_size; i < num_items; i += max_cub_size) { + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr first_elem = allocator->allocate(sizeof(InitValueT)); + auto first_elem_ptr = reinterpret_cast(first_elem.get()); + + size_cub = std::min(num_items - i, max_cub_size); + impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( + output + i - 1, + input + i - 1, + first_elem_ptr, + scan_op); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input + i, + output + i, + scan_op, + ::at_cuda_detail::cub::FutureValue(first_elem_ptr), + size_cub, + at::cuda::getCurrentCUDAStream()); + } +#endif +} + + +template +inline void inclusive_sum_by_key(KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub InclusiveSumByKey does not support more than INT_MAX elements"); +#if !defined(USE_ROCM) + CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey, + keys, input, output, num_items, NO_ROCM(::cuda)::std::equal_to<>(), at::cuda::getCurrentCUDAStream()); +#else + CUB_WRAPPER(cub::DeviceScan::InclusiveSumByKey, + keys, input, output, num_items, hipcub::Equality(), at::cuda::getCurrentCUDAStream()); +#endif +} + +template +inline void inclusive_scan_by_key(KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, ScanOpT scan_op, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub InclusiveSumByKey does not support more than INT_MAX elements"); +#if !defined(USE_ROCM) + CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveScanByKey, + keys, input, output, scan_op, num_items, NO_ROCM(::cuda)::std::equal_to<>(), at::cuda::getCurrentCUDAStream()); +#else + CUB_WRAPPER(cub::DeviceScan::InclusiveScanByKey, + keys, input, output, scan_op, num_items, hipcub::Equality(), at::cuda::getCurrentCUDAStream()); +#endif +} + + +template +void unique(InputIteratorT input, OutputIteratorT output, + NumSelectedIteratorT num_selected_out, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub unique does not support more than INT_MAX elements"); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSelect::Unique, + input, output, num_selected_out, num_items, at::cuda::getCurrentCUDAStream()); +} + +template +void run_length_encode(InputIteratorT input, OutputIteratorT output, CountsOutputIteratorT counts_out, + LengthOutputIteratorT length_out, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub run_length_encode does not support more than INT_MAX elements"); + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRunLengthEncode::Encode, + input, output, counts_out, length_out, num_items, + at::cuda::getCurrentCUDAStream()); +} + +template +void reduce(InputIteratorT input, OutputIteratorT output, int64_t num_items, ReductionOpT op, T init) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub reduce does not support more than INT_MAX elements"); + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceReduce::Reduce, + input, output, num_items, op, init, + at::cuda::getCurrentCUDAStream()); + +} + +} // namespace at::cuda::cub + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h new file mode 100644 index 0000000000000000000000000000000000000000..199e46b7464eed369a63b66a8e1026529218b724 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h @@ -0,0 +1,98 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +// NOTE: These templates are intentionally not defined in this header, +// which avoids re-compiling them for each translation unit. If you get +// a link error, you need to add an explicit instantiation for your +// types in cub.cu + +namespace at::cuda::cub { + +inline int get_num_bits(uint64_t max_key) { + int num_bits = 1; + while (max_key > 1) { + max_key >>= 1; + num_bits++; + } + return num_bits; +} + +namespace detail { + +// radix_sort_pairs doesn't interact with value_t other than to copy +// the data, so we can save template instantiations by reinterpreting +// it as an opaque type. +// We use native integer types for 1/2/4/8-byte values to reduce +// register usage in CUDA kernels. For sizes > 8 fall back to char array. +template struct alignas(N) OpaqueType { char data[N]; }; +template <> struct alignas(1) OpaqueType<1> { uint8_t data; }; +template <> struct alignas(2) OpaqueType<2> { uint16_t data; }; +template <> struct alignas(4) OpaqueType<4> { uint32_t data; }; +template <> struct alignas(8) OpaqueType<8> { uint64_t data; }; + +template +void radix_sort_pairs_impl( + const key_t *keys_in, key_t *keys_out, + const OpaqueType *values_in, OpaqueType *values_out, + int64_t n, bool descending, int64_t begin_bit, int64_t end_bit); + +} // namespace detail + +template +void radix_sort_pairs( + const key_t *keys_in, key_t *keys_out, + const value_t *values_in, value_t *values_out, + int64_t n, bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8) { + static_assert(std::is_trivially_copyable_v || + AT_ROCM_ENABLED(), // ROCm incorrectly fails this check for vector types + "radix_sort_pairs value type must be trivially copyable"); + // Make value type opaque, so all inputs of a certain size use the same template instantiation + using opaque_t = detail::OpaqueType; + static_assert(sizeof(value_t) <= 8 && (sizeof(value_t) & (sizeof(value_t) - 1)) == 0, + "This size of value_t is not instantiated. Please instantiate it in cub.cu" + " and modify this check."); + static_assert(sizeof(value_t) == alignof(value_t), "Expected value_t to be size-aligned"); + detail::radix_sort_pairs_impl( + keys_in, keys_out, + reinterpret_cast(values_in), + reinterpret_cast(values_out), + n, descending, begin_bit, end_bit); +} + +template +void radix_sort_keys( + const key_t *keys_in, key_t *keys_out, + int64_t n, bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8); + +// NOTE: Intermediate sums will be truncated to input_t precision +template +void inclusive_sum_truncating(const input_t *input, output_t *output, int64_t n); + +template +void inclusive_sum(const scalar_t *input, scalar_t *output, int64_t n) { + return inclusive_sum_truncating(input, output, n); +} + +// NOTE: Sums are done is common_type +template +void exclusive_sum_in_common_type(const input_t *input, output_t *output, int64_t n); + +template +void exclusive_sum(const scalar_t *input, scalar_t *output, int64_t n) { + return exclusive_sum_in_common_type(input, output, n); +} + +void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n); +inline void mask_exclusive_sum(const bool *mask, int64_t *output_idx, int64_t n) { + return mask_exclusive_sum( + reinterpret_cast(mask), output_idx, n); +} + +} // namespace at::cuda::cub + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh new file mode 100644 index 0000000000000000000000000000000000000000..563bd82bebb7375081ee35f5021edc5bb5e17e78 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#if !defined(USE_ROCM) +#include // for CUDA_VERSION +#endif + +#if !defined(USE_ROCM) +#include +#else +#define CUB_VERSION 200001 +#endif + +// cub support for CUB_WRAPPED_NAMESPACE is added to cub 1.13.1 in: +// https://github.com/NVIDIA/cub/pull/326 +// CUB_WRAPPED_NAMESPACE is defined globally in cmake/Dependencies.cmake +// starting from CUDA 11.5 +#if defined(CUB_WRAPPED_NAMESPACE) || defined(THRUST_CUB_WRAPPED_NAMESPACE) +#define USE_GLOBAL_CUB_WRAPPED_NAMESPACE() true +#else +#define USE_GLOBAL_CUB_WRAPPED_NAMESPACE() false +#endif + +// There were many bc-breaking changes in major version release of CCCL v3.0.0 +// Please see https://nvidia.github.io/cccl/cccl/3.0_migration_guide.html +#if CUB_VERSION >= 200800 +#define CUB_V3_PLUS() true +#else +#define CUB_V3_PLUS() false +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h new file mode 100644 index 0000000000000000000000000000000000000000..7cb786a3c1ab5bfb8150f3211cbdac61bd3cc9c1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#if AT_USE_JITERATOR() + +#include +#include +#include + +#include +#include + +namespace at::cuda { + +TORCH_CUDA_CPP_API c10::SmallVector CompileAndLaunchKernel( + const std::string& code_string, + const std::string& kernel_name, + const int num_outputs, + const c10::SmallVector& tensors, + const c10::SmallVector& extra_args, + bool return_by_ref); + +} // namespace at::cuda + +#else + +namespace at::cuda { + +TORCH_CUDA_CPP_API c10::SmallVector CompileAndLaunchKernel( + const std::string& code_string, + const std::string& kernel_name, + const int num_outputs, + const c10::SmallVector& tensors, + const c10::SmallVector& extra_args, + bool return_by_ref) { + TORCH_CHECK(false, "Jiterator is not supported"); + } +} // namespace at::cuda + +#endif // AT_USE_JITERATOR() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..acdcba30dc6a5f64f5b066c2f0795f251ca53f86 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h @@ -0,0 +1,255 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#if AT_USE_JITERATOR() + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace at::native { + + +#define AT_FOR_8_CASES(_) \ + _(1) \ + _(2) \ + _(3) \ + _(4) \ + _(5) \ + _(6) \ + _(7) \ + _(8) + +#define AT_FOR_8_CASES_WITH_COMMA(_) \ + _(1) , \ + _(2) , \ + _(3) , \ + _(4) , \ + _(5) , \ + _(6) , \ + _(7) , \ + _(8) + +c10::SmallVector get_extra_args_typenames(const c10::SmallVector& extra_args) { + c10::SmallVector args_typenames(extra_args.size()); + for (const auto i : c10::irange(extra_args.size())) { + args_typenames[i] = at::cuda::jit::typeName(extra_args[i].type()); + } + return args_typenames; +} + +int can_vectorize_up_to(at::ScalarType type, char* pointer) { + switch(type) { +#define DEFINE_CASE(ctype, scalartype) \ + case ScalarType::scalartype : return memory::can_vectorize_up_to(pointer); + + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE) +#undef DEFINE_CASE + + default: TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type); + } +} + +// jitted version of the above +// See Note [Jiterator], this relies on the assumptions enumerated there +int jitted_can_vectorize_up_to(const TensorIteratorBase& iter) { + const at::ScalarType common_dtype = iter.common_dtype(); + const at::ScalarType result_dtype = common_dtype; + + // Deals with output + int result = can_vectorize_up_to(result_dtype, static_cast(iter.data_ptr(0))); + + // Incorporates input(s) + for (auto i = 1; i < iter.ntensors(); ++i) { + result = std::min(result, can_vectorize_up_to(common_dtype, static_cast(iter.data_ptr(i)))); + } + + return result; +} + +template +static std::unique_ptr> make_unique_offset_calculator( + const TensorIteratorBase& iter) { + // array size can not be 0, this happens when N == 0 + constexpr int array_size = std::max(N, 1); + TORCH_INTERNAL_ASSERT(N == (IS_INPUT ? iter.ninputs() : iter.noutputs())); + + std::array strides; + int64_t element_sizes[array_size]; + for (int i = 0; i < N; i++) { + int index = IS_INPUT ? i + iter.noutputs() : i; + strides[i] = iter.strides(index).data(); + element_sizes[i] = iter.element_size(index); + } + return std::make_unique>(iter.ndim(), iter.shape().data(), strides.data(), element_sizes); +} + +template +struct OffsetCalculatorVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using OffsetCalculatorTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + OffsetCalculatorVariant(const TensorIteratorBase& iter) { + int num = IS_INPUT ? iter.ninputs() : iter.noutputs(); + + switch(num) { +#define DEFINE_CASE(index) \ + case index : v = make_unique_offset_calculator(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + default: + TORCH_CHECK(false, "OffsetCalculatorVariant is not implemented for num_tensor = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + + private: + OffsetCalculatorTypes v{}; +}; + +struct ArrayVariant { +// works for up to 8 input + 8 outputs +#define DEFINE_CASE(index) std::array, std::array + using ArrayTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + ArrayVariant(const TensorIteratorBase& iter) { + int ntensors = iter.ntensors(); + switch(ntensors) { +#define DEFINE_CASE(index) \ + case index: array = std::array{}; break; \ + case index+8: array = std::array{}; break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "ArrayVariant is not implemented for ntensors = ", ntensors); + } + + std::visit([&](auto& a) { + for (auto i = 0; i < ntensors; ++i) { + a[i] = (char*)iter.data_ptr(i); + } + }, array); + } + + void* data_ptr() { + return std::visit([](auto & a){ return static_cast(&a); }, array); + } + +private: + ArrayTypes array; +}; + +struct TrivialOffsetCalculatorVariant { +#define DEFINE_CASE(index) TrivialOffsetCalculator + using TrivialOffsetCalculatorTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + TrivialOffsetCalculatorVariant(int num) { + switch(num) { +#define DEFINE_CASE(index) \ + case index: v = TrivialOffsetCalculator(); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "TrivialOffsetCalculatorVariant is not implemented for num_tensors = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(&v); }, v); + } + +private: + TrivialOffsetCalculatorTypes v{}; +}; + +struct LoadWithCastVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using LoadWithCastPtr = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + LoadWithCastVariant(const TensorIteratorBase& iter) { + int arity = iter.ninputs(); + switch(arity) { +#define DEFINE_CASE(index) \ + case index: v = std::make_unique>(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "LoadWithCastVariant is not implemented for ninputs = ", arity); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + +private: + LoadWithCastPtr v{}; +}; + +struct StoreWithCastVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using StoreWithCastPtr = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + StoreWithCastVariant(const TensorIteratorBase& iter) { + int num = iter.noutputs(); + switch(num) { +#define DEFINE_CASE(index) \ + case index: v = std::make_unique>(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "StoreWithCastVariant is not implemented for noutputs = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + +private: + StoreWithCastPtr v{}; +}; + +} // namespace at::native + + +#endif // AT_USE_JITERATOR() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..fb802616b8a8aeb8bd4b398e665e24470c2b674b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +TORCH_CUDA_CPP_API const std::string &get_traits_string(); +TORCH_CUDA_CPP_API const std::string &get_cmath_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_body_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_half_body_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_math_string(); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h new file mode 100644 index 0000000000000000000000000000000000000000..ab53d20eee3d9cb4e942f86ac4d26a3c8ab309b7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h @@ -0,0 +1,415 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#endif + +#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 8907 +#define USE_CUDNN_RNN_V8_API +#endif + +namespace at::native { + +std::string cudnnTypeToString(cudnnDataType_t dtype); + +// TODO: Add constructors for all of the descriptors + +inline int dataSize(cudnnDataType_t dataType) +{ + switch (dataType) { + case CUDNN_DATA_BFLOAT16: + case CUDNN_DATA_HALF: return 2; + case CUDNN_DATA_FLOAT: return 4; + default: return 8; + } +} + +// NOTE [ cudnn fixSizeOneDimStride ] +// The stride for a size-1 dimensions is not uniquely determined; in +// fact, it can be anything you want, because the fact that the +// tensor is size 1 at this dimension means that you will never actually +// try advancing your pointer by this stride. +// +// However, CuDNN has a much more stringent requirement on strides: +// if you are passing a contiguous input, it better be the case +// that the stride for dim i is the product of the sizes of dims +// i+1 to the end. This stride is indeed uniquely determined. This +// function modifies 'stride' in place so this invariant holds. +template +static inline void fixSizeOneDimStride(int dim, const T *size, T *stride, bool nhwc) { + int64_t z = 1; + int index = 0; + std::vector permutation(dim); + + if (nhwc) { + permutation[index++] = 1; + } + for (int d = dim-1; d > 1; d--) { + permutation[index++] = d; + } + if (!nhwc) { + permutation[index++] = 1; + } + permutation[index++] = 0; + for (int d : permutation) { + if (size[d] == 1) { + stride[d] = z; + } else { + z *= size[d]; + } + } +} + +template +struct DescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + AT_CUDNN_CHECK(dtor(x)); + } + } +}; + +// A generic class for wrapping cuDNN descriptor types. All you need +// is to give the underlying type the Descriptor_t points to (usually, +// if it's cudnnTensorDescriptor_t it points to cudnnTensorStruct), +// the constructor and the destructor. Subclasses are responsible +// for defining a set() function to actually set the descriptor. +// +// Descriptors default construct to a nullptr, and have a descriptor +// initialized the first time you call set() or any other initializing +// function. +template +// NOLINTNEXTLINE(bugprone-exception-escape) +class TORCH_CUDA_CPP_API Descriptor { + public: + // TODO: Figure out why const-correctness doesn't work here + + // Use desc() to access the underlying descriptor pointer in + // a read-only fashion. Most client code should use this. + // If the descriptor was never initialized, this will return + // nullptr. + T* desc() const { return desc_.get(); } + T* desc() { return desc_.get(); } + + // Use mut_desc() to access the underlying descriptor pointer + // if you intend to modify what it points to (e.g., using + // cudnnSetFooDescriptor). This will ensure that the descriptor + // is initialized. Code in this file will use this function. + T* mut_desc() { init(); return desc_.get(); } +protected: + void init() { + if (desc_ == nullptr) { + T* raw_desc = nullptr; + AT_CUDNN_CHECK(ctor(&raw_desc)); + desc_.reset(raw_desc); + } + } +private: + std::unique_ptr> desc_; +}; + +class TORCH_CUDA_CPP_API RNNDataDescriptor : public Descriptor< + cudnnRNNDataStruct, + &cudnnCreateRNNDataDescriptor, + &cudnnDestroyRNNDataDescriptor> { +public: + void set(const at::Tensor &t, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray); +private: + void set(cudnnDataType_t dataType, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray) { + AT_CUDNN_CHECK(cudnnSetRNNDataDescriptor(mut_desc(), dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, nullptr)); + } +}; + +class TORCH_CUDA_CPP_API TensorDescriptor : public Descriptor< + cudnnTensorStruct, + &cudnnCreateTensorDescriptor, + &cudnnDestroyTensorDescriptor> { + public: + TensorDescriptor() = default; + explicit TensorDescriptor(const at::Tensor &t, size_t pad = 0) { + set(t, pad); + } + + // Note [CuDNN broadcast padding] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // pad specifies the minimum dimensionality of the tensor descriptor + // we produce (it doesn't have anything to do with, e.g., convolution + // padding). If 't' is lower-dimensional than 'pad', the remaining + // dimensions (on the right) are padded with ones. This doesn't + // affect the underlying data layout. This is particularly useful for + // dealing with a peculiarity of the CuDNN API, which is that broadcasting in CuDNN is + // done in two steps: first, the client code is expected to pad out + // (the dimensions) input tensors to be the same dimension as the + // target broadcast, and then second, CuDNN takes of actually + // broadcasting size 1 dimensions. + + void set(const at::Tensor &t, size_t pad = 0); + void set(const at::Tensor &t, at::MemoryFormat memory_format, size_t pad = 0); + void set(cudnnDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad = 0); + + void print(); + +private: + void set(cudnnDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad, bool nhwc); + + void set(cudnnDataType_t dataType, int dim, int* size, int* stride, bool nhwc) { + std::vector strides_copy(stride, stride + dim); + fixSizeOneDimStride(dim, size, strides_copy.data(), nhwc); + AT_CUDNN_CHECK(cudnnSetTensorNdDescriptor(mut_desc(), dataType, dim, size, strides_copy.data())); + } +}; + +std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d); + +class TORCH_CUDA_CPP_API FilterDescriptor : public Descriptor< + cudnnFilterStruct, + &cudnnCreateFilterDescriptor, + &cudnnDestroyFilterDescriptor> { + public: + void set(const at::Tensor &t, int64_t pad = 0) { + set(t, at::MemoryFormat::Contiguous, pad); + } + + void set(const at::Tensor &t, const at::MemoryFormat memory_format, int64_t pad = 0); + + void print(); +private: + void set(cudnnDataType_t dataType, int dim, int* size, cudnnTensorFormat_t filter_format) { + AT_CUDNN_CHECK(cudnnSetFilterNdDescriptor(mut_desc(), dataType, filter_format, dim, size)); + } +}; + +std::ostream& operator<<(std::ostream & out, const FilterDescriptor& d); + +struct TORCH_CUDA_CPP_API ConvolutionDescriptor + : public Descriptor< + cudnnConvolutionStruct, + &cudnnCreateConvolutionDescriptor, + &cudnnDestroyConvolutionDescriptor> { + void set(cudnnDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool allow_tf32) { + cudnnDataType_t mathType = dataType; + if (dataType == CUDNN_DATA_HALF) mathType = CUDNN_DATA_FLOAT; + AT_CUDNN_CHECK(cudnnSetConvolutionNdDescriptor(mut_desc(), dim, pad, stride, upscale, + CUDNN_CROSS_CORRELATION, mathType)); + AT_CUDNN_CHECK(cudnnSetConvolutionGroupCount(mut_desc(), groups)); + // See Note [behavior of cudnnFind and cudnnGet] + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_DEFAULT_MATH)); + if(dataType == CUDNN_DATA_HALF) { + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_TENSOR_OP_MATH)); + } else if (dataType == CUDNN_DATA_FLOAT && !allow_tf32) { + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_FMA_MATH)); + } + } +}; + +struct TORCH_CUDA_CPP_API SpatialTransformerDescriptor + : public Descriptor< + cudnnSpatialTransformerStruct, + &cudnnCreateSpatialTransformerDescriptor, + &cudnnDestroySpatialTransformerDescriptor> { + void set(cudnnDataType_t dataType, int dim, int* size) { + AT_CUDNN_CHECK(cudnnSetSpatialTransformerNdDescriptor(mut_desc(), CUDNN_SAMPLER_BILINEAR, dataType, dim, size)); + } +}; + +// NOLINTNEXTLINE(bugprone-exception-escape) +struct TORCH_CUDA_CPP_API DropoutDescriptor + : public Descriptor< + cudnnDropoutStruct, + &cudnnCreateDropoutDescriptor, + &cudnnDestroyDropoutDescriptor> { + at::Tensor state; + + // Initialize a dropout descriptor's RNG state. + // WARNING: This function is very expensive, avoid calling this function! + void initialize_rng(cudnnHandle_t handle, float dropout, long long int seed, const TensorOptions& options) { + TORCH_INTERNAL_ASSERT(dropout > 0, "dropout must be nonzero; otherwise call set_no_dropout"); + size_t state_size = 0; + AT_CUDNN_CHECK(cudnnDropoutGetStatesSize(handle, &state_size)); + AT_ASSERT(options.device().type() == kCUDA); + AT_ASSERT(options.dtype() == kByte); + state = at::empty({static_cast(state_size)}, options); + AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, dropout, state.data_ptr(), state_size, seed)); + } + + // Restore a dropout descriptor given a dropout probability and existing RNG state. + void set(cudnnHandle_t handle, float dropout, const at::Tensor& state) { + TORCH_INTERNAL_ASSERT(dropout > 0, "dropout must be nonzero; otherwise call set_no_dropout"); + void *state_ptr = state.data_ptr(); + size_t state_size = state.size(0); + // NB: The seed doesn't actually matter, so we give a dummy value + AT_CUDNN_CHECK(cudnnRestoreDropoutDescriptor(mut_desc(), handle, dropout, state_ptr, state_size, 0 /* seed */)); + } + + // Restore a dropout descriptor corresponding to no dropout + void set_no_dropout(cudnnHandle_t handle) { + // NB: seed doesn't matter when dropout = 0, because no random number + // initialization actually takes place when there is no dropout. + // NB: Empirically, cudnnSetDropoutDescriptor is cheap when + // dropout == 0 + AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, 0 /* dropout */, nullptr, 0 /* state_size */, 0 /* seed */)); + } +}; + +struct TORCH_CUDA_CPP_API RNNDescriptor : public Descriptor< + cudnnRNNStruct, + &cudnnCreateRNNDescriptor, + &cudnnDestroyRNNDescriptor> { + DropoutDescriptor dropout_desc_; + void set(cudnnHandle_t handle, +#ifdef USE_CUDNN_RNN_V8_API + int input_size, + bool packed, +#endif + int hidden_size, int proj_size, int num_layers, DropoutDescriptor&& dropout_desc, + cudnnRNNInputMode_t input_mode, cudnnDirectionMode_t bidirectional, + cudnnRNNMode_t mode, cudnnDataType_t datatype, cudnnDataType_t input_type, cudnnRNNAlgo_t algo, bool allow_tf32) { + dropout_desc_ = std::move(dropout_desc); +#ifndef USE_CUDNN_RNN_V8_API + AT_CUDNN_CHECK(cudnnSetRNNDescriptor_v6( + handle, + mut_desc(), + hidden_size, + num_layers, + dropout_desc_.desc(), + input_mode, + bidirectional, + mode, + algo, + datatype)); + if (proj_size != 0) { + AT_CUDNN_CHECK(cudnnSetRNNProjectionLayers( + handle, + /*rnnDesc=*/mut_desc(), + /*recProjSize=*/proj_size, + /*outProjSize=*/0)); + } + cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); + if (prop->major >= 7) { + if (input_type == CUDNN_DATA_HALF) { + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_TENSOR_OP_MATH); + } + else if (input_type == CUDNN_DATA_FLOAT && !allow_tf32) { + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_FMA_MATH); + } + else { + // Technically, as the default it's not necessary to explicitly + // set this. + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_DEFAULT_MATH); + } + } +#else + cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); + auto math_type = CUDNN_DEFAULT_MATH; + if (prop->major >= 7) { + if (input_type == CUDNN_DATA_HALF) { + math_type = CUDNN_TENSOR_OP_MATH; + } else if (!allow_tf32) { + math_type = CUDNN_FMA_MATH; + } + } + AT_CUDNN_CHECK(cudnnSetRNNDescriptor_v8( + mut_desc(), + algo, + mode, + CUDNN_RNN_DOUBLE_BIAS, + bidirectional, + input_mode, + input_type, + datatype, + math_type, + input_size, + hidden_size, + proj_size ? proj_size : hidden_size, + num_layers, + dropout_desc_.desc(), + packed ? CUDNN_RNN_PADDED_IO_DISABLED : CUDNN_RNN_PADDED_IO_ENABLED)); +#endif + } +}; + +struct TORCH_CUDA_CPP_API CTCLossDescriptor + : public Descriptor< + cudnnCTCLossStruct, + &cudnnCreateCTCLossDescriptor, + &cudnnDestroyCTCLossDescriptor> { + void set(cudnnDataType_t datatype) { + AT_CUDNN_CHECK(cudnnSetCTCLossDescriptor(mut_desc(), datatype)); + } + void setEx( + cudnnDataType_t datatype, + cudnnLossNormalizationMode_t normMode, + cudnnNanPropagation_t gradMode) { + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptorEx(mut_desc(), datatype, normMode, gradMode)); + } + void set_v8_v9( + cudnnDataType_t datatype, + cudnnLossNormalizationMode_t normMode, + cudnnNanPropagation_t gradMode, + int maxLabelLength) { +#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 90000 + auto gradModev9 = CUDNN_CTC_ZERO_OOB_GRADIENTS; + if (gradMode == cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN) { + gradModev9 = CUDNN_CTC_SKIP_OOB_GRADIENTS; + } + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptor_v9(mut_desc(), datatype, normMode, gradModev9, maxLabelLength)); +#else + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptor_v8(mut_desc(), datatype, normMode, gradMode, maxLabelLength)); +#endif + } + +}; + +struct TORCH_CUDA_CPP_API ActivationDescriptor + : public Descriptor< + cudnnActivationStruct, + &cudnnCreateActivationDescriptor, + &cudnnDestroyActivationDescriptor> { + void set(cudnnActivationMode_t mode) { + AT_ASSERT( + mode == CUDNN_ACTIVATION_RELU, + "TODO: support more cuDNN activation modes"); + AT_CUDNN_CHECK(cudnnSetActivationDescriptor( + mut_desc(), + mode, + cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN, + std::numeric_limits::max())); + } +}; + +union Constant +{ + float f; + double d; + Constant(cudnnDataType_t dataType, double value) { + if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) { + f = static_cast(value); + } else { + d = value; + } + } +}; + +} // namespace + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h new file mode 100644 index 0000000000000000000000000000000000000000..e9a63546fc601d365fafc562b73af83e721ebd09 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::native { + +TORCH_CUDA_CPP_API cudnnHandle_t getCudnnHandle(); +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h new file mode 100644 index 0000000000000000000000000000000000000000..af02865d70cd1a87c8e15dc27c13a62075cf9c08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h @@ -0,0 +1,7 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h new file mode 100644 index 0000000000000000000000000000000000000000..c02eb65e04c0daa19c51b7ed217bd9684f042047 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::native { + +TORCH_CUDA_CPP_API cudnnDataType_t +getCudnnDataTypeFromScalarType(const at::ScalarType dtype); +cudnnDataType_t getCudnnDataType(const at::Tensor& tensor); + +int64_t cudnn_version(); + +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h new file mode 100644 index 0000000000000000000000000000000000000000..25f70509eb5289ec8bfc4cf5545565dd5dda9d48 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at::native { + +// cuDNN has a buggy check for tensor being contiguous (that is, it does +// not ignore stride for dimension that is equal to 0). This function +// makes tensors which have zero stride contiguous, by setting the +// strides to 1 as cuDNN likes. +inline Tensor contiguousIfZeroInStrides(const Tensor& t) { + for (auto s : t.strides()) { + if (s == 0) + return t.contiguous(); + } + return t; +} + +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..d291f5a778a53b678406ffaff59b86b15543b4e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#define STRINGIFY(x) #x +#define STRING(x) STRINGIFY(x) + +#if CUDNN_MAJOR < 8 || (CUDNN_MAJOR == 8 && CUDNN_MINOR < 5) +#pragma message("CuDNN v" STRING( \ + CUDNN_MAJOR) " found, but need at least CuDNN v8. You can get the latest version of CuDNN from https://developer.nvidia.com/cudnn or disable CuDNN with USE_CUDNN=0") +#pragma message "We strongly encourage you to move to 8.5 and above." +#pragma message "This message is intended to annoy you enough to update." +#endif + +#undef STRINGIFY +#undef STRING + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..6eb86b68bb755d4016e68aac8c801c72c27d72ab --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h @@ -0,0 +1,101 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +// AcceleratorHooksInterface is a shared interface provided by all +// accelerators to allow generic code. +// This interface is hook-based as it corresponds to all the functions +// that are going to be called in a generic way from the CPU code. + +struct TORCH_API AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + virtual ~AcceleratorHooksInterface() = default; + + // Whether this backend was enabled at compilation time. + // This function should NEVER throw. + virtual bool isBuilt() const { + return false; + } + + // Whether this backend can be used at runtime, meaning it was built, + // its runtime dependencies are available (driver) and at least one + // supported device can be used. + // This function should NEVER throw. This function should NOT initialize the context + // on any device (result of hasPrimaryContext below should not change). + // While it is acceptable for this function to poison fork, it is + // recommended to avoid doing so whenever possible. + virtual bool isAvailable() const { + return false; + } + + // Whether the device at device_index is fully initialized or not. + virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0; + + virtual void init() const { + TORCH_CHECK(false, "Backend doesn`t support init()"); + } + + virtual DeviceIndex deviceCount() const { + return 0; + } + + virtual void setCurrentDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support setCurrentDevice()"); + } + + virtual DeviceIndex getCurrentDevice() const { + TORCH_CHECK(false, "Backend doesn't support getCurrentDevice()"); + return -1; + } + + virtual DeviceIndex exchangeDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support exchangeDevice()"); + return -1; + } + + virtual DeviceIndex maybeExchangeDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support maybeExchangeDevice()"); + return -1; + } + + virtual bool isPinnedPtr(const void* data) const { + return false; + } + + virtual Allocator* getPinnedMemoryAllocator() const { + TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()"); + return nullptr; + } + + virtual Device getDeviceFromPtr(void* data) const { + TORCH_CHECK(false, "Backend doesn't support getDeviceFromPtr()"); + } + + virtual const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const { + TORCH_CHECK(false, "Backend doesn`t support getDefaultGenerator()"); + } + + virtual Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const { + TORCH_CHECK(false, "Backend doesn`t support getNewGenerator()"); + } +}; + +} // namespace at + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..4a1dd2fb1b0839e8424735a8c312a83a7ef10f24 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h @@ -0,0 +1,249 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +// Forward-declares at::cuda::NVRTC +namespace cuda { +struct NVRTC; +} // namespace cuda + +#ifdef _MSC_VER +constexpr const char* CUDA_HELP = + "PyTorch splits its backend into two shared libraries: a CPU library " + "and a CUDA library; this error has occurred because you are trying " + "to use some CUDA functionality, but the CUDA library has not been " + "loaded by the dynamic linker for some reason. The CUDA library MUST " + "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! " + "One common culprit is a lack of -INCLUDE:?warp_size@cuda@at@@YAHXZ " + "in your link arguments; many dynamic linkers will delete dynamic library " + "dependencies if you don't depend on any of their symbols. You can check " + "if this has occurred by using link on your binary to see if there is a " + "dependency on *_cuda.dll library."; +#else +constexpr const char* CUDA_HELP = + "PyTorch splits its backend into two shared libraries: a CPU library " + "and a CUDA library; this error has occurred because you are trying " + "to use some CUDA functionality, but the CUDA library has not been " + "loaded by the dynamic linker for some reason. The CUDA library MUST " + "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! " + "One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many " + "dynamic linkers will delete dynamic library dependencies if you don't " + "depend on any of their symbols. You can check if this has occurred by " + "using ldd on your binary to see if there is a dependency on *_cuda.so " + "library."; +#endif + +// The CUDAHooksInterface is an omnibus interface for any CUDA functionality +// which we may want to call into from CPU code (and thus must be dynamically +// dispatched, to allow for separate compilation of CUDA code). How do I +// decide if a function should live in this class? There are two tests: +// +// 1. Does the *implementation* of this function require linking against +// CUDA libraries? +// +// 2. Is this function *called* from non-CUDA ATen code? +// +// (2) should filter out many ostensible use-cases, since many times a CUDA +// function provided by ATen is only really ever used by actual CUDA code. +// +// TODO: Consider putting the stub definitions in another class, so that one +// never forgets to implement each virtual function in the real implementation +// in CUDAHooks. This probably doesn't buy us much though. +struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~CUDAHooksInterface() override = default; + + // Initialize THCState and, transitively, the CUDA state + void init() const override { + TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, + "Cannot get default CUDA generator without ATen_cuda library. ", + CUDA_HELP); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, + "Cannot get CUDA generator without ATen_cuda library. ", + CUDA_HELP); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + virtual bool hasCUDA() const { + return false; + } + + virtual bool hasCUDART() const { + return false; + } + + virtual bool hasMAGMA() const { + return false; + } + + virtual bool hasCuDNN() const { + return false; + } + + virtual bool hasCuSOLVER() const { + return false; + } + + virtual bool hasCuBLASLt() const { + return false; + } + + virtual bool hasROCM() const { + return false; + } + + virtual bool hasCKSDPA() const { + return false; + } + + virtual bool hasCKGEMM() const { + return false; + } + + virtual const at::cuda::NVRTC& nvrtc() const { + TORCH_CHECK(false, "NVRTC requires CUDA. ", CUDA_HELP); + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot call hasPrimaryContext(", device_index, ") without ATen_cuda library. ", CUDA_HELP); + } + + virtual DeviceIndex current_device() const { + return -1; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Pinned memory requires CUDA. ", CUDA_HELP); + } + + virtual Allocator* getCUDADeviceAllocator() const { + TORCH_CHECK(false, "CUDADeviceAllocator requires CUDA. ", CUDA_HELP); + } + + virtual bool compiledWithCuDNN() const { + return false; + } + + virtual bool compiledWithMIOpen() const { + return false; + } + + virtual bool supportsDilatedConvolutionWithCuDNN() const { + return false; + } + + virtual bool supportsDepthwiseConvolutionWithCuDNN() const { + return false; + } + + virtual bool supportsBFloat16ConvolutionWithCuDNNv8() const { + return false; + } + + virtual bool supportsBFloat16RNNWithCuDNN() const { + return false; + } + + virtual long versionCuDNN() const { + TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionRuntimeCuDNN() const { + TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionCuDNNFrontend() const { + TORCH_CHECK(false, "Cannot query cuDNN Frontend version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionMIOpen() const { + TORCH_CHECK(false, "Cannot query MIOpen version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionCUDART() const { + TORCH_CHECK(false, "Cannot query CUDART version without ATen_cuda library. ", CUDA_HELP); + } + + virtual std::string showConfig() const { + TORCH_CHECK(false, "Cannot query detailed CUDA version without ATen_cuda library. ", CUDA_HELP); + } + + virtual double batchnormMinEpsilonCuDNN() const { + TORCH_CHECK(false, + "Cannot query batchnormMinEpsilonCuDNN() without ATen_cuda library. ", CUDA_HELP); + } + + virtual int64_t cuFFTGetPlanCacheMaxSize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual void cuFFTSetPlanCacheMaxSize(DeviceIndex /*device_index*/, int64_t /*max_size*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual int64_t cuFFTGetPlanCacheSize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual void cuFFTClearPlanCache(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual int getNumGPUs() const { + return 0; + } + +#ifdef USE_ROCM + virtual bool isGPUArch(const std::vector& /*archs*/, DeviceIndex = -1 /*device_index*/) const { + TORCH_CHECK(false, "Cannot check GPU arch without ATen_cuda library. ", CUDA_HELP); + } +#endif + + virtual void deviceSynchronize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot synchronize CUDA device without ATen_cuda library. ", CUDA_HELP); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API CUDAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs); +#define REGISTER_CUDA_HOOKS(clsname) \ + C10_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const CUDAHooksInterface& getCUDAHooks(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h new file mode 100644 index 0000000000000000000000000000000000000000..50f8d2bea0c3f921cbd304ad0116d64d29fa0db1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h @@ -0,0 +1,108 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +// Modified from https://stackoverflow.com/questions/7943525/is-it-possible-to-figure-out-the-parameter-type-and-return-type-of-a-lambda + +// Fallback, anything with an operator() +template +struct function_traits : public function_traits { +}; + +// Pointers to class members that are themselves functors. +// For example, in the following code: +// template +// struct S { +// func_t f; +// }; +// template +// S make_s(func_t f) { +// return S { .f = f }; +// } +// +// auto s = make_s([] (int, float) -> double { /* ... */ }); +// +// function_traits traits; +template +struct function_traits : public function_traits { +}; + +// Const class member functions +template +struct function_traits : public function_traits { +}; + +// Reference types +template +struct function_traits : public function_traits {}; +template +struct function_traits : public function_traits {}; + +// Free functions +template +struct function_traits { + // arity is the number of arguments. + enum { arity = sizeof...(Args) }; + + using ArgsTuple = std::tuple; + using result_type = ReturnType; + + template + struct arg + { + using type = typename std::tuple_element>::type; + // the i-th argument is equivalent to the i-th tuple element of a tuple + // composed of those arguments. + }; +}; + +template +struct nullary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; +}; + +template +struct unary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; + using arg1_t = typename traits::template arg<0>::type; +}; + +template +struct binary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; + using arg1_t = typename traits::template arg<0>::type; + using arg2_t = typename traits::template arg<1>::type; +}; + + +// Traits for calling with c10::guts::invoke, where member_functions have a first argument of ClassType +template +struct invoke_traits : public function_traits{ +}; + +template +struct invoke_traits : public invoke_traits{ +}; + +template +struct invoke_traits : public invoke_traits{ +}; + +template +struct invoke_traits : + public function_traits { +}; + +template +struct invoke_traits : + public function_traits { +}; + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..6fa8e1cd02db86b14ff315ec8daf8fed38097f31 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +// The HIPHooksInterface is an omnibus interface for any HIP functionality +// which we may want to call into from CPU code (and thus must be dynamically +// dispatched, to allow for separate compilation of HIP code). See +// CUDAHooksInterface for more detailed motivation. +struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~HIPHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library."); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library."); + } + + virtual bool hasHIP() const { + return false; + } + + virtual c10::DeviceIndex current_device() const { + return -1; + } + + bool isPinnedPtr(const void* /*data*/ ) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Pinned memory requires HIP."); + } + + virtual int getNumGPUs() const { + return 0; + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/ ) const override { + TORCH_CHECK(false, "Cannot check primary context without ATen_hip library."); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API HIPHooksArgs {}; + +TORCH_DECLARE_REGISTRY(HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs); +#define REGISTER_HIP_HOOKS(clsname) \ + C10_REGISTER_CLASS(HIPHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const HIPHooksInterface& getHIPHooks(); + +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..77f69ffb6ac4cf4faeb5ad7f6890433459ea1235 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h @@ -0,0 +1,62 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace at { + +struct TORCH_API HPUHooksInterface : AcceleratorHooksInterface { + ~HPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize HPU without HPU backend"); + } + + virtual bool hasHPU() const { + return false; + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK( + false, "Cannot get device of pointer on HPU without HPU backend"); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK( + false, + "You should register `HPUHooksInterface` for HPU before call `getPinnedMemoryAllocator`."); + } + + bool hasPrimaryContext( + [[maybe_unused]] DeviceIndex device_index) const override { + TORCH_CHECK( + false, + "You should register `HPUHooksInterface` for HPU before call `hasPrimaryContext`."); + } +}; + +struct TORCH_API HPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(HPUHooksRegistry, HPUHooksInterface, HPUHooksArgs); +#define REGISTER_HPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(HPUHooksRegistry, clsname, clsname) + +namespace detail { + +TORCH_API const at::HPUHooksInterface& getHPUHooks(); + +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..1c8d81d3081519c9b55cbdf8b21b13535b2c4e05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h @@ -0,0 +1,48 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace at { + +struct TORCH_API IPUHooksInterface : AcceleratorHooksInterface { + ~IPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + return false; + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } + + Generator getNewGenerator( + DeviceIndex /*device_index*/ = -1) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } +}; + +struct TORCH_API IPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(IPUHooksRegistry, IPUHooksInterface, IPUHooksArgs); +#define REGISTER_IPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(IPUHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const IPUHooksInterface& getIPUHooks(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..a214c0ef37db323cc536cebdcee3ccd6187ceebf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +struct TORCH_API MAIAHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~MAIAHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library."); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library."); + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK(false, "Cannot query detailed MAIA version information."); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API MAIAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MAIAHooksRegistry, MAIAHooksInterface, MAIAHooksArgs); +#define REGISTER_MAIA_HOOKS(clsname) \ + C10_REGISTER_CLASS(MAIAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MAIAHooksInterface& getMAIAHooks(); +} // namespace detail + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..7ebeceb095ac3861f62777fbe11a299064233f07 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright © 2022 Apple Inc. + +#pragma once + +#include + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") +namespace at { + +struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface { + // this fails the implementation if MPSHooks functions are called, but + // MPS backend is not present. + #define FAIL_MPSHOOKS_FUNC(func) \ + TORCH_CHECK(false, "Cannot execute ", func, "() without MPS backend."); + + ~MPSHooksInterface() override = default; + + // Initialize the MPS library state + void init() const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual bool hasMPS() const { + return false; + } + virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual Allocator* getMPSDeviceAllocator() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void deviceSynchronize() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void commitStream() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void* getCommandBuffer() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void* getDispatchQueue() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void emptyCache() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getCurrentAllocatedMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getDriverAllocatedMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getRecommendedMaxMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void setMemoryFraction(double /*ratio*/) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void profilerStartTrace(const std::string& mode, bool waitUntilCompleted) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void profilerStopTrace() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual uint32_t acquireEvent(bool enable_timing) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + Device getDeviceFromPtr(void* data) const override { + TORCH_CHECK(false, "Cannot get device of pointer on MPS without ATen_mps library. "); + } + virtual void releaseEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void recordEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void waitForEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void synchronizeEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual bool queryEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual double elapsedTimeOfEvents(uint32_t start_event_id, uint32_t end_event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + bool hasPrimaryContext(DeviceIndex device_index) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + bool isPinnedPtr(const void* data) const override { + return false; + } + Allocator* getPinnedMemoryAllocator() const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + #undef FAIL_MPSHOOKS_FUNC +}; + +struct TORCH_API MPSHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MPSHooksRegistry, MPSHooksInterface, MPSHooksArgs); +#define REGISTER_MPS_HOOKS(clsname) \ + C10_REGISTER_CLASS(MPSHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MPSHooksInterface& getMPSHooks(); + +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..830e9ac171602a7507b7b007a2c4059bbb57316c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h @@ -0,0 +1,218 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +namespace at { +class Context; +} + +namespace at { +constexpr const char* MTIA_HELP = + "The MTIA backend requires MTIA extension for PyTorch;" + "this error has occurred because you are trying " + "to use some MTIA's functionality without MTIA extension included."; + +struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface { +// this fails the implementation if MTIAHooks functions are called, but +// MTIA backend is not present. +#define FAIL_MTIAHOOKS_FUNC(func) TORCH_CHECK(false, "Cannot execute ", func, "() without MTIA backend."); + + ~MTIAHooksInterface() override = default; + + void init() const override { + // Avoid logging here, since MTIA needs init devices first then it will know + // how many devices are available. Make it as no-op if mtia extension is not + // dynamically loaded. + return; + } + + virtual bool hasMTIA() const { + return false; + } + + DeviceIndex deviceCount() const override { + return 0; + } + + virtual void deviceSynchronize(c10::DeviceIndex /*device_index*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual std::string showConfig() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + return false; + } + + void setCurrentDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + DeviceIndex getCurrentDevice() const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + DeviceIndex exchangeDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + DeviceIndex maybeExchangeDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual c10::Stream getCurrentStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA); + } + + virtual int64_t getCurrentRawStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual c10::Stream getDefaultStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA); + } + + virtual void setCurrentStream(const c10::Stream& /*stream*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* memoryStats(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* getDeviceCapability(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* getDeviceProperties(DeviceIndex device) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual void emptyCache() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void recordMemoryHistory(const std::optional& /*enabled*/, + const std::string& /*stacks*/, + size_t /*max_entries*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual PyObject* memorySnapshot(const std::optional& local_path) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual DeviceIndex getDeviceCount() const { + FAIL_MTIAHOOKS_FUNC(__func__); + return 0; + } + + virtual void resetPeakMemoryStats(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void attachOutOfMemoryObserver(PyObject* observer) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return; + } + + virtual bool isAvailable() const override; + + /* MTIAGraph related APIs */ + virtual int64_t mtiagraphCreate(bool keep_graph = false) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual void mtiagraphDestroy(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphCaptureBegin(int64_t handle, MempoolId_t pool) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphCaptureEnd(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphInstantiate(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphReplay(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphReset(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual MempoolId_t mtiagraphPool(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual MempoolId_t graphPoolHandle() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual const Generator& getDefaultGenerator(DeviceIndex) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + static Generator dummy_generator; + return dummy_generator; + } + + virtual Generator getNewGenerator(DeviceIndex) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + static Generator dummy_generator; + return dummy_generator; + } +}; + +struct TORCH_API MTIAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MTIAHooksRegistry, MTIAHooksInterface, MTIAHooksArgs); +#define REGISTER_MTIA_HOOKS(clsname) C10_REGISTER_CLASS(MTIAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MTIAHooksInterface& getMTIAHooks(); +TORCH_API bool isMTIAHooksBuilt(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..3f0dcc93a0cdcaf1f0837eb9b79237eceec67016 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface { +#define FAIL_PRIVATEUSE1HOOKS_FUNC(func) \ + TORCH_CHECK_NOT_IMPLEMENTED( \ + false, \ + "You should register `PrivateUse1HooksInterface`", \ + "by `RegisterPrivateUse1HooksInterface` and implement `", \ + func, \ + "` at the same time for PrivateUse1."); + + ~PrivateUse1HooksInterface() override = default; + + bool isBuilt() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool isAvailable() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + const at::Generator& getDefaultGenerator( + c10::DeviceIndex device_index) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + // TODO(FFFrog): Preserved for BC and will be removed in the future. + if (at::GetGeneratorPrivate().has_value()) + return at::GetGeneratorForPrivateuse1(device_index); + + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + at::Device getDeviceFromPtr(void* data) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + void init() const override {} + virtual void resizePrivateUse1Bytes( + const c10::Storage& storage, + size_t newsize) const { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + +#undef FAIL_PRIVATEUSE1HOOKS_FUNC +}; + +struct TORCH_API PrivateUse1HooksArgs {}; + +TORCH_API void RegisterPrivateUse1HooksInterface( + at::PrivateUse1HooksInterface* hook_); + +TORCH_API bool isPrivateUse1HooksRegistered(); + +namespace detail { + +TORCH_API const at::PrivateUse1HooksInterface& getPrivateUse1Hooks(); + +} // namespace detail + +} // namespace at + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..35498352e43964c0280f4ab760ee86968a118e71 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +constexpr const char* XLA_HELP = + "This error has occurred because you are trying " + "to use some XLA functionality, but the XLA library has not been " + "loaded by the dynamic linker. You must load xla libraries by `import torch_xla`"; + +struct TORCH_API XLAHooksInterface : AcceleratorHooksInterface { + ~XLAHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize XLA without torch_xla library. ", XLA_HELP); + } + + virtual bool hasXLA() const { + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK( + false, + "Cannot query detailed XLA version without torch_xla library. ", + XLA_HELP); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, "Cannot get default XLA generator without torch_xla library. ", XLA_HELP); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot get XLA generator without torch_xla library. ", XLA_HELP); + } + + virtual DeviceIndex getCurrentDevice() const override { + TORCH_CHECK(false, "Cannot get current XLA device without torch_xla library. ", XLA_HELP); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on XLA without torch_xla library. ", XLA_HELP); + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Cannot get XLA pinned memory allocator without torch_xla library. ", XLA_HELP); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot query primary context without torch_xla library. ", XLA_HELP); + } + +}; + +struct TORCH_API XLAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(XLAHooksRegistry, XLAHooksInterface, XLAHooksArgs); +#define REGISTER_XLA_HOOKS(clsname) \ + C10_REGISTER_CLASS(XLAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const XLAHooksInterface& getXLAHooks(); +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..51106c50a569568bab82336031e24d648cc31d27 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{ + ~XPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize XPU without ATen_xpu library."); + } + + virtual bool hasXPU() const { + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK( + false, + "Cannot query detailed XPU version without ATen_xpu library."); + } + + virtual int32_t getGlobalIdxFromDevice(const Device& device) const { + TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library."); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, "Cannot get default XPU generator without ATen_xpu library."); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library."); + } + + virtual DeviceIndex getNumGPUs() const { + return 0; + } + + virtual DeviceIndex current_device() const { + TORCH_CHECK(false, "Cannot get current device on XPU without ATen_xpu library."); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on XPU without ATen_xpu library."); + } + + virtual void deviceSynchronize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot synchronize XPU device without ATen_xpu library."); + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Cannot get XPU pinned memory allocator without ATen_xpu library."); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot query primary context without ATen_xpu library."); + } +}; + +struct TORCH_API XPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(XPUHooksRegistry, XPUHooksInterface, XPUHooksArgs); +#define REGISTER_XPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(XPUHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const XPUHooksInterface& getXPUHooks(); +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h new file mode 100644 index 0000000000000000000000000000000000000000..c36e999b1e107a1f2ea1e4538eed7c0a9da16564 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#ifndef MetalContext_h +#define MetalContext_h + +#include + +#include + +namespace at::metal { + +struct MetalInterface { + virtual ~MetalInterface() = default; + virtual bool is_metal_available() const = 0; + virtual at::Tensor& metal_copy_(at::Tensor& self, const at::Tensor& src) + const = 0; +}; + +extern std::atomic g_metal_impl_registry; + +class MetalImplRegistrar { + public: + explicit MetalImplRegistrar(MetalInterface* /*impl*/); +}; + +at::Tensor& metal_copy_(at::Tensor& self, const at::Tensor& src); + +} // namespace at::metal + +namespace at::native { +bool is_metal_available(); +} // namespace at::native + +#endif /* MetalContext_h */ + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3c0b1ecb50fa28ca914c34f38070098ca14c3199 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API at::Tensor _adaptive_avg_pool2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h new file mode 100644 index 0000000000000000000000000000000000000000..49a169367ec1071385aaf583c9665c83cc0324b7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h @@ -0,0 +1,69 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor _add_relu(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Tensor::call(self, other, alpha); +} + +// aten::_add_relu_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & _add_relu_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu__Tensor::call(self, other, alpha); +} + +// aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_out::call(self, other, alpha, out); +} +// aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_outf(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::_add_relu_out::call(self, other, alpha, out); +} + +// aten::_add_relu.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor _add_relu(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Scalar::call(self, other, alpha); +} + +// aten::_add_relu_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & _add_relu_(at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu__Scalar::call(self, other, alpha); +} + +// aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Scalar_out::call(self, other, alpha, out); +} +// aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_outf(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::_add_relu_Scalar_out::call(self, other, alpha, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_aminmax_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_aminmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6369e658b18c0d1569107886a160fc3460d4fd6b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_aminmax_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _aminmax_out(const at::Tensor & self, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple _aminmax_all(const at::Tensor & self); +TORCH_API ::std::tuple _aminmax_dim_out(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple _aminmax(const at::Tensor & self, int64_t dim, bool keepdim=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f51763bc289b0bce1a4ff433f390aaf009382b39 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor & _amp_update_scale_(at::Tensor & self, at::Tensor & growth_tracker, const at::Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..16d074205aeb0f2c773a2327e8eb0844e40c2fb0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_amp_update_scale_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & _amp_update_scale_(at::Tensor & self, at::Tensor & growth_tracker, const at::Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e0bddc3ddebc765890a264fabce60e80dd704f8d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _backward(const at::Tensor & self, at::TensorList inputs, const ::std::optional & gradient={}, ::std::optional retain_graph=::std::nullopt, bool create_graph=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bad0025d2751eb90ca51aca5fddddfc7fa7a5d95 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _batch_norm_impl_index_backward(int64_t impl_index, const at::Tensor & input, const at::Tensor & grad_output, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var_transform, bool train, double eps, ::std::array output_mask, const at::Tensor & reservedSpace); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f51b5beca2abe8a6b0f7d4c74c2833debd153ece --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _cast_Double { + using schema = at::Tensor (const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_cast_Double"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_cast_Double(Tensor self, bool non_blocking=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, bool non_blocking); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool non_blocking); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Float_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Float_native.h new file mode 100644 index 0000000000000000000000000000000000000000..29af731461004b38bcf7acf40ba1000d313a879e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Float_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _cast_Float(const at::Tensor & self, bool non_blocking=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h new file mode 100644 index 0000000000000000000000000000000000000000..b1dd26435a3d5b961a14d0d49001bcb7736240d7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_cast_Int(Tensor self, bool non_blocking=False) -> Tensor +inline at::Tensor _cast_Int(const at::Tensor & self, bool non_blocking=false) { + return at::_ops::_cast_Int::call(self, non_blocking); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Short_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Short_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..07a269cdfc4d369f54d801ba2ac036261e2b62da --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Short_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _cast_Short(const at::Tensor & self, bool non_blocking=false); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cdist_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cdist_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3f8fd24fed879dba4c52c5e952bddcc1f85b39ca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cdist_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _cdist_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, double, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_cdist_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & x1, const at::Tensor & x2, double p, const at::Tensor & cdist); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & x1, const at::Tensor & x2, double p, const at::Tensor & cdist); +}; + +struct TORCH_API _cdist_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, double, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_cdist_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_cdist_backward.out(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad, const at::Tensor & x1, const at::Tensor & x2, double p, const at::Tensor & cdist, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & x1, const at::Tensor & x2, double p, const at::Tensor & cdist, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..e13feb2ca7d02c0d54a2a7d4c94a4a579809a4a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int) +inline ::std::tuple _choose_qparams_per_tensor(const at::Tensor & self, bool reduce_range=false) { + return at::_ops::_choose_qparams_per_tensor::call(self, reduce_range); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..56e6aff883e93d23d8b740c2f0b5d77ffdbb795b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _choose_qparams_per_tensor(const at::Tensor & self, bool reduce_range=false); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46a8ede799981e1f40f3894a3f120b7e85cce1fe --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _conj_physical(const at::Tensor & self); +TORCH_API at::Tensor & _conj_physical_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _conj_physical_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3ddeb28a2a4281175cdb7afc3bee589b9d87c947 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _conj_physical { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_conj_physical"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_conj_physical(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _conj_physical_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_conj_physical"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h new file mode 100644 index 0000000000000000000000000000000000000000..b45d0a750f39873fbd1c071d659430eec43726fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor +inline at::Tensor _convert_indices_from_csr_to_coo(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false) { + return at::_ops::_convert_indices_from_csr_to_coo::call(crow_indices, col_indices, out_int32, transpose); +} + +// aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _convert_indices_from_csr_to_coo_out(at::Tensor & out, const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false) { + return at::_ops::_convert_indices_from_csr_to_coo_out::call(crow_indices, col_indices, out_int32, transpose, out); +} +// aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _convert_indices_from_csr_to_coo_outf(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, at::Tensor & out) { + return at::_ops::_convert_indices_from_csr_to_coo_out::call(crow_indices, col_indices, out_int32, transpose, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ca32d3cab9f0ab688edbf8d5e90f33d70ed2104c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__convert_indices_from_csr_to_coo_structured_cpu : public at::meta::structured__convert_indices_from_csr_to_coo { +void impl(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, const at::Tensor & out); +}; +struct TORCH_API structured__convert_indices_from_csr_to_coo_structured_cuda : public at::meta::structured__convert_indices_from_csr_to_coo { +void impl(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3f2903a5276fe4aec51496dd8c296612ad0b4a8f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _convert_indices_from_csr_to_coo { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_convert_indices_from_csr_to_coo"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor"; + static at::Tensor call(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose); +}; + +struct TORCH_API _convert_indices_from_csr_to_coo_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_convert_indices_from_csr_to_coo"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h new file mode 100644 index 0000000000000000000000000000000000000000..e99a52d3fc5bae9f3bd590d4a750e5c5d4a66b32 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor +inline at::Tensor _convert_weight_to_int4pack(const at::Tensor & self, int64_t innerKTiles) { + return at::_ops::_convert_weight_to_int4pack::call(self, innerKTiles); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convolution_mode_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convolution_mode_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..37d2036c540b5f6ec8b5552b680b155095202c84 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_convolution_mode_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _convolution_mode(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, c10::string_view padding, at::IntArrayRef dilation, int64_t groups); +TORCH_API at::Tensor _convolution_mode_symint(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h new file mode 100644 index 0000000000000000000000000000000000000000..5e7ad78d1909d9d9a244f41ec60b969be0200068 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor +inline at::Tensor _copy_from(const at::Tensor & self, const at::Tensor & dst, bool non_blocking=false) { + return at::_ops::_copy_from::call(self, dst, non_blocking); +} + +// aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _copy_from_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & dst, bool non_blocking=false) { + return at::_ops::_copy_from_out::call(self, dst, non_blocking, out); +} +// aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _copy_from_outf(const at::Tensor & self, const at::Tensor & dst, bool non_blocking, at::Tensor & out) { + return at::_ops::_copy_from_out::call(self, dst, non_blocking, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e9995875704625993ccfc7049f9922aa177413a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _cslt_compress(const at::Tensor & input); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h new file mode 100644 index 0000000000000000000000000000000000000000..710bb07a3fdc9d3d6f4cea4ef02af89b9ae67ab2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _cudnn_init_dropout_state_out(double dropout, bool train, int64_t dropout_seed, at::Tensor & out); +TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3621007d9fc69a52c88df8177fba5447a59082d2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _cudnn_rnn_flatten_weight_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional); +TORCH_API at::Tensor & _cudnn_rnn_flatten_weight_outf(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out); +TORCH_API at::Tensor & _cudnn_rnn_flatten_weight_symint_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional); +TORCH_API at::Tensor & _cudnn_rnn_flatten_weight_symint_outf(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_max_size_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_max_size_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..76bab16b1c9e653e9a64ef5b573233e41f52988a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_max_size_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t _cufft_get_plan_cache_max_size(at::DeviceIndex device_index); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h new file mode 100644 index 0000000000000000000000000000000000000000..aa988b125eb6561e593eb3dd08c68eae48a18d52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_cufft_get_plan_cache_size(DeviceIndex device_index) -> int +inline int64_t _cufft_get_plan_cache_size(at::DeviceIndex device_index) { + return at::_ops::_cufft_get_plan_cache_size::call(device_index); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..79e94a580a96c18badb99633b0b740caec451b2f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_dirichlet_grad(Tensor x, Tensor alpha, Tensor total) -> Tensor +inline at::Tensor _dirichlet_grad(const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total) { + return at::_ops::_dirichlet_grad::call(x, alpha, total); +} + +// aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _dirichlet_grad_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total) { + return at::_ops::_dirichlet_grad_out::call(x, alpha, total, out); +} +// aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _dirichlet_grad_outf(const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total, at::Tensor & out) { + return at::_ops::_dirichlet_grad_out::call(x, alpha, total, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6b0077774af5d0af67b9ecc5e40c335d1412002b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _dyn_quant_matmul_4bit(const at::Tensor & inp, const at::Tensor & packed_weights, int64_t block_size, int64_t in_features, int64_t out_features); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..16588a6610e05dd3af70815b54a4fefe54ad0c26 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _dyn_quant_pack_4bit_weight(const at::Tensor & weights, const at::Tensor & scales_zeros, const ::std::optional & bias, int64_t block_size, int64_t in_features, int64_t out_features); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..74b3293e8b507c96e7b0598f0910ae073beaa1fa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _efficient_attention_backward(const at::Tensor & grad_out_, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const ::std::optional & bias, const at::Tensor & out, const ::std::optional & cu_seqlens_q, const ::std::optional & cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, const at::Tensor & logsumexp, double dropout_p, const at::Tensor & philox_seed, const at::Tensor & philox_offset, int64_t custom_mask_type, bool bias_requires_grad, ::std::optional scale=::std::nullopt, ::std::optional num_splits_key=::std::nullopt, ::std::optional window_size=::std::nullopt, bool shared_storage_dqdkdv=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..58fad5b1b2ff28a9c997fee57272d9ca89258453 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _efficientzerotensor_out(at::Tensor & out, at::IntArrayRef size); +TORCH_API at::Tensor & _efficientzerotensor_outf(at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & _efficientzerotensor_symint_out(at::Tensor & out, c10::SymIntArrayRef size); +TORCH_API at::Tensor & _efficientzerotensor_symint_outf(c10::SymIntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9544335046d776821fcdee82c30e04c3b8db08ab --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _embedding_bag_backward(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor _embedding_bag_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3a3273c8d101d64f7006ad88ccd25aa7b6eada05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _embedding_bag_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..080dfad3fa465a9219a097b99b448859f611f13f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _embedding_bag_dense_backward_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor & _embedding_bag_dense_backward_outf(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); +TORCH_API at::Tensor & _embedding_bag_dense_backward_symint_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor & _embedding_bag_dense_backward_symint_outf(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..aefb66ab79c3e91cf8c14fa6fb89b9fe0e53131a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _embedding_bag_dense_backward(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor _embedding_bag_dense_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..04099d7303e4a27d490cd64a1293de6175c3b334 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _embedding_bag_dense_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, bool, int64_t, const ::std::optional &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_dense_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); +}; + +struct TORCH_API _embedding_bag_dense_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, bool, int64_t, const ::std::optional &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_dense_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_embedding_bag_dense_backward.out(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..622c3bcd1187844e91caf7553a7d459be771c2e4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _embedding_bag_per_sample_weights_backward(const at::Tensor & grad, const at::Tensor & weight, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, int64_t mode, int64_t padding_idx=-1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..28d039d2f58afd9abb40095aef9b7a33695712dc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_per_sample_weights_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _embedding_bag_per_sample_weights_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_per_sample_weights_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_embedding_bag_per_sample_weights_backward(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & weight, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, int64_t mode, int64_t padding_idx); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & weight, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, int64_t mode, int64_t padding_idx); +}; + +struct TORCH_API _embedding_bag_per_sample_weights_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_per_sample_weights_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_embedding_bag_per_sample_weights_backward.out(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad, const at::Tensor & weight, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, int64_t mode, int64_t padding_idx, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & weight, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, int64_t mode, int64_t padding_idx, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_empty_per_channel_affine_quantized.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_empty_per_channel_affine_quantized.h new file mode 100644 index 0000000000000000000000000000000000000000..467fa5350ada685f02f79b42ec92f6af625abf78 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_empty_per_channel_affine_quantized.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor +inline at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} +namespace symint { + template >> + at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); + } +} + +// aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor +inline at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format) { + return at::_ops::_empty_per_channel_affine_quantized::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format); +} +namespace symint { + template >> + at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format) { + return at::_ops::_empty_per_channel_affine_quantized::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format); + } +} + +// aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor +inline at::Tensor _empty_per_channel_affine_quantized_symint(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized::call(size, scales, zero_points, axis, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} +namespace symint { + template >> + at::Tensor _empty_per_channel_affine_quantized(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized::call(size, scales, zero_points, axis, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); + } +} + +// aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor +inline at::Tensor _empty_per_channel_affine_quantized_symint(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format) { + return at::_ops::_empty_per_channel_affine_quantized::call(size, scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format); +} +namespace symint { + template >> + at::Tensor _empty_per_channel_affine_quantized(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format) { + return at::_ops::_empty_per_channel_affine_quantized::call(size, scales, zero_points, axis, dtype, layout, device, pin_memory, memory_format); + } +} + +// aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _empty_per_channel_affine_quantized_out(at::Tensor & out, at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, memory_format, out); +} +namespace symint { + template >> + at::Tensor & _empty_per_channel_affine_quantized_out(at::Tensor & out, at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, memory_format, out); + } +} + +// aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _empty_per_channel_affine_quantized_outf(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, memory_format, out); +} +namespace symint { + template >> + at::Tensor & _empty_per_channel_affine_quantized_outf(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(c10::fromIntArrayRefSlow(size), scales, zero_points, axis, memory_format, out); + } +} + +// aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _empty_per_channel_affine_quantized_symint_out(at::Tensor & out, c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(size, scales, zero_points, axis, memory_format, out); +} +namespace symint { + template >> + at::Tensor & _empty_per_channel_affine_quantized_out(at::Tensor & out, c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format=c10::MemoryFormat::Contiguous) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(size, scales, zero_points, axis, memory_format, out); + } +} + +// aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _empty_per_channel_affine_quantized_symint_outf(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(size, scales, zero_points, axis, memory_format, out); +} +namespace symint { + template >> + at::Tensor & _empty_per_channel_affine_quantized_outf(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::_empty_per_channel_affine_quantized_out::call(size, scales, zero_points, axis, memory_format, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_euclidean_dist_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_euclidean_dist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..251d460ef836cc36a1f27d3ab695f29c851f1071 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_euclidean_dist_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _euclidean_dist { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_euclidean_dist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_euclidean_dist(Tensor x1, Tensor x2) -> Tensor"; + static at::Tensor call(const at::Tensor & x1, const at::Tensor & x2); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x1, const at::Tensor & x2); +}; + +struct TORCH_API _euclidean_dist_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_euclidean_dist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_euclidean_dist.out(Tensor x1, Tensor x2, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x1, const at::Tensor & x2, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x1, const at::Tensor & x2, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..03064ade4a22ffb017ac37ab696dceeab6f0aa57 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_backward.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_fake_quantize_learnable_per_channel_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> (Tensor, Tensor, Tensor) +inline ::std::tuple _fake_quantize_learnable_per_channel_affine_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor=1.0) { + return at::_ops::_fake_quantize_learnable_per_channel_affine_backward::call(grad, self, scale, zero_point, axis, quant_min, quant_max, grad_factor); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..37f9e9c3fa100ea0294769939db57e6f35fee8c8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _fake_quantize_learnable_per_channel_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor=1.0); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..dec0cb882832bae79669a73d6b2c98556f25ddcd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fake_quantize_learnable_per_channel_affine { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, int64_t, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fake_quantize_learnable_per_channel_affine"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); +}; + +struct TORCH_API _fake_quantize_learnable_per_channel_affine_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, int64_t, double, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fake_quantize_learnable_per_channel_affine"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_fake_quantize_learnable_per_channel_affine.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..338726346da75a6e6a425393958370614bf9e7a7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _fake_quantize_learnable_per_tensor_affine_backward(const at::Tensor & grad, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor=1.0); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fill_mem_eff_dropout_mask_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fill_mem_eff_dropout_mask_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..19b6a03275f8434753af7f2bf6c6bfe12ec32fb6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fill_mem_eff_dropout_mask_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor & _fill_mem_eff_dropout_mask_(at::Tensor & self, double dropout_p, int64_t seed, int64_t offset); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6d2b7cbc16108af2caae7873de47b64803a8bb47 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_abs { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_abs(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_abs_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_abs_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_abs_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_abs.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_acos_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_acos_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9e6ead7afc2d66735163b101bd2d0cb9edae0fc6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_acos_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_acos(at::TensorList self); +TORCH_API void _foreach_acos_(at::TensorList self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h new file mode 100644 index 0000000000000000000000000000000000000000..28971f0626c3da2d935416ba0a04723e0391f1c3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_addcmul_scalar_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API void _foreach_addcmul_Scalar_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_scalar_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API ::std::vector foreach_tensor_addcmul_scalar_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API void foreach_tensor_addcmul_scalar_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API ::std::vector foreach_tensor_addcmul_scalarlist_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API void _foreach_addcmul_ScalarList_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_scalarlist_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_scalarlist_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API void foreach_tensor_addcmul_scalarlist_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_tensor_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API void _foreach_addcmul_Tensor_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_tensor_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_tensor_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API void foreach_tensor_addcmul_tensor_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_ceil_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_ceil_native.h new file mode 100644 index 0000000000000000000000000000000000000000..835d543f0b326f4b6e43bbeb8268d400da1b2b17 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_ceil_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_ceil_slow(at::TensorList self); +TORCH_API void _foreach_ceil_out(at::TensorList self, at::TensorList out); +TORCH_API void foreach_tensor_ceil_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_ceil_cuda(at::TensorList self); +TORCH_API void foreach_tensor_ceil_cuda_(at::TensorList self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h new file mode 100644 index 0000000000000000000000000000000000000000..0b793c28aa47248db77eea0d3525541824021d54 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_clamp_min_scalar_kernel_slow(at::TensorList self, const at::Scalar & scalar); +TORCH_API void _foreach_clamp_min_Scalar_out(at::TensorList self, const at::Scalar & scalar, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_scalar_kernel_slow_(at::TensorList self, const at::Scalar & scalar); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalar_kernel_cuda(at::TensorList self, const at::Scalar & scalar); +TORCH_API void foreach_tensor_clamp_min_scalar_kernel_cuda_(at::TensorList self, const at::Scalar & scalar); +TORCH_API ::std::vector foreach_tensor_clamp_min_list_kernel_slow(at::TensorList self, at::TensorList other); +TORCH_API void _foreach_clamp_min_List_out(at::TensorList self, at::TensorList other, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_list_kernel_slow_(at::TensorList self, at::TensorList other); +TORCH_API ::std::vector foreach_tensor_clamp_min_list_kernel_cuda(at::TensorList self, at::TensorList other); +TORCH_API void foreach_tensor_clamp_min_list_kernel_cuda_(at::TensorList self, at::TensorList other); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalarlist_kernel_slow(at::TensorList self, at::ArrayRef scalars); +TORCH_API void _foreach_clamp_min_ScalarList_out(at::TensorList self, at::ArrayRef scalars, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_scalarlist_kernel_slow_(at::TensorList self, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalarlist_kernel_cuda(at::TensorList self, at::ArrayRef scalars); +TORCH_API void foreach_tensor_clamp_min_scalarlist_kernel_cuda_(at::TensorList self, at::ArrayRef scalars); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos.h new file mode 100644 index 0000000000000000000000000000000000000000..2aff126d2993baf12d8d5b32c66b18fb92b5de0e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_foreach_cos(Tensor[] self) -> Tensor[] +inline ::std::vector _foreach_cos(at::TensorList self) { + return at::_ops::_foreach_cos::call(self); +} + +// aten::_foreach_cos_(Tensor(a!)[] self) -> () +inline void _foreach_cos_(at::TensorList self) { + return at::_ops::_foreach_cos_::call(self); +} + +// aten::_foreach_cos.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_cos_out(at::TensorList out, at::TensorList self) { + return at::_ops::_foreach_cos_out::call(self, out); +} +// aten::_foreach_cos.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_cos_outf(at::TensorList self, at::TensorList out) { + return at::_ops::_foreach_cos_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d086912353c24932707d866ac0fac91f0d66e74c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_cos_slow(at::TensorList self); +TORCH_API void _foreach_cos_out(at::TensorList self, at::TensorList out); +TORCH_API void foreach_tensor_cos_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_cos_cuda(at::TensorList self); +TORCH_API void foreach_tensor_cos_cuda_(at::TensorList self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cosh_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cosh_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8d1f62c494da42c617fad4248d37e9d3d20deaa9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cosh_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_cosh(at::TensorList self); +TORCH_API void _foreach_cosh_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_cosh_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_cosh_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erf.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erf.h new file mode 100644 index 0000000000000000000000000000000000000000..ea8ec05c4e2fe592c98deac3f5aa08189da2c5e4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erf.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_foreach_erf(Tensor[] self) -> Tensor[] +inline ::std::vector _foreach_erf(at::TensorList self) { + return at::_ops::_foreach_erf::call(self); +} + +// aten::_foreach_erf_(Tensor(a!)[] self) -> () +inline void _foreach_erf_(at::TensorList self) { + return at::_ops::_foreach_erf_::call(self); +} + +// aten::_foreach_erf.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_erf_out(at::TensorList out, at::TensorList self) { + return at::_ops::_foreach_erf_out::call(self, out); +} +// aten::_foreach_erf.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_erf_outf(at::TensorList self, at::TensorList out) { + return at::_ops::_foreach_erf_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erfc_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erfc_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a095dfe6f8dbf93c1390b37474c53ea261263ee9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_erfc_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_erfc(at::TensorList self); +TORCH_API void _foreach_erfc_(at::TensorList self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_exp_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_exp_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b0f77a02fb6dc420fcf08339c70d3b047cde0cad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_exp_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_exp(at::TensorList self); +TORCH_API void _foreach_exp_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_exp_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_exp_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_expm1_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_expm1_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..449a5f3d324481e3eed65d4e66888977a4fe98f0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_expm1_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_expm1(at::TensorList self); +TORCH_API void _foreach_expm1_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_expm1_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_expm1_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_floor_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_floor_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c0be7ba11f7a784331e7c586ec036b8b3c72210b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_floor_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_floor(at::TensorList self); +TORCH_API void _foreach_floor_(at::TensorList self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..467f1210115b746f14e24b154e601e0708bda7e3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_log10 { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_log10(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_log10_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_log10_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_log10_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_log10.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log1p_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log1p_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..79fc0745674bf030b0ca713709d0641e12f804e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log1p_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_log1p(at::TensorList self); +TORCH_API void _foreach_log1p_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_log1p_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_log1p_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d37826985accd96074b2e3152269f815af408f94 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_neg { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_neg(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_neg_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_neg_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_neg_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_neg.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0d415c565c93374ee145a9b5636019a5e48dcb1e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_norm(at::TensorList self, const at::Scalar & ord=2, ::std::optional dtype=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_pow_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_pow_native.h new file mode 100644 index 0000000000000000000000000000000000000000..71de7b8363eea06a9aecafe776d42296984a9492 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_pow_native.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_pow_list_kernel_slow(at::TensorList self, at::TensorList exponent); +TORCH_API void _foreach_pow_List_out(at::TensorList self, at::TensorList exponent, at::TensorList out); +TORCH_API void foreach_tensor_pow_list_kernel_slow_(at::TensorList self, at::TensorList exponent); +TORCH_API ::std::vector foreach_tensor_pow_list_kernel_cuda(at::TensorList self, at::TensorList exponent); +TORCH_API void foreach_tensor_pow_list_kernel_cuda_(at::TensorList self, at::TensorList exponent); +TORCH_API ::std::vector foreach_tensor_pow_scalar_kernel_slow(at::TensorList self, const at::Scalar & exponent); +TORCH_API void _foreach_pow_Scalar_out(at::TensorList self, const at::Scalar & exponent, at::TensorList out); +TORCH_API void foreach_tensor_pow_scalar_kernel_slow_(at::TensorList self, const at::Scalar & exponent); +TORCH_API ::std::vector foreach_tensor_pow_scalar_kernel_cuda(at::TensorList self, const at::Scalar & exponent); +TORCH_API void foreach_tensor_pow_scalar_kernel_cuda_(at::TensorList self, const at::Scalar & exponent); +TORCH_API ::std::vector foreach_tensor_pow_scalarlist_kernel_slow(at::TensorList self, at::ArrayRef exponent); +TORCH_API void _foreach_pow_ScalarList_out(at::TensorList self, at::ArrayRef exponent, at::TensorList out); +TORCH_API void foreach_tensor_pow_scalarlist_kernel_slow_(at::TensorList self, at::ArrayRef exponent); +TORCH_API ::std::vector foreach_tensor_pow_scalarlist_kernel_cuda(at::TensorList self, at::ArrayRef exponent); +TORCH_API void foreach_tensor_pow_scalarlist_kernel_cuda_(at::TensorList self, at::ArrayRef exponent); +TORCH_API ::std::vector foreach_scalar_pow_list_kernel_slow(const at::Scalar & self, at::TensorList exponent); +TORCH_API ::std::vector foreach_scalar_pow_list_kernel_cuda(const at::Scalar & self, at::TensorList exponent); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..58b3498ddcd7b476c2ee3732c7d72ed2dc60dad0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_sigmoid { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_sigmoid(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_sigmoid_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_sigmoid_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_sigmoid_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_sigmoid.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b1e28df9f18f28096eccfeb64cb88b71e10bb6c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_sign_slow(at::TensorList self); +TORCH_API void _foreach_sign_out(at::TensorList self, at::TensorList out); +TORCH_API void foreach_tensor_sign_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_sign_cuda(at::TensorList self); +TORCH_API void foreach_tensor_sign_cuda_(at::TensorList self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sinh_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sinh_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..58bbccfa35dd30b010320732a49af6284d3dba34 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sinh_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_sinh(at::TensorList self); +TORCH_API void _foreach_sinh_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_sinh_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_sinh_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2c0ffd1f66bb56be934f0a4e394690da6d17d90b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_tan(at::TensorList self); +TORCH_API void _foreach_tan_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_tan_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_tan_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..02930b3ab45a704a44825a4ac0f2993f45075010 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_tanh(at::TensorList self); +TORCH_API void _foreach_tanh_(at::TensorList self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ad4e53e4898b97553362eef909aa89ce236a34ce --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _functional_assert_async_msg { + using schema = at::Tensor (const at::Tensor &, c10::string_view, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_functional_assert_async"; + static constexpr const char* overload_name = "msg"; + static constexpr const char* schema_str = "_functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view assert_msg, const at::Tensor & dep_token); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view assert_msg, const at::Tensor & dep_token); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3a01e28aa7af0ae10445007c5c86f0a143e57483 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_adam_out(at::TensorList out, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_adam_outf(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_adam_out(at::TensorList out, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_adam_outf(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5fe4c1d6e5190bbde2dd4e683c2286631c99ea99 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_adam_ops.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fused_adam_ { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()"; + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); +}; + +struct TORCH_API _fused_adam__tensor_lr { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam_"; + static constexpr const char* overload_name = "tensor_lr"; + static constexpr const char* schema_str = "_fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()"; + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); +}; + +struct TORCH_API _fused_adam_out { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_fused_adam.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +}; + +struct TORCH_API _fused_adam { + using schema = ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fused_adam(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)"; + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); +}; + +struct TORCH_API _fused_adam_tensor_lr_out { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam"; + static constexpr const char* overload_name = "tensor_lr_out"; + static constexpr const char* schema_str = "_fused_adam.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +}; + +struct TORCH_API _fused_adam_tensor_lr { + using schema = ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fused_adam"; + static constexpr const char* overload_name = "tensor_lr"; + static constexpr const char* schema_str = "_fused_adam.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)"; + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_rms_norm_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_rms_norm_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..e68188a1597dfb196f85ed77c5bf054674217adb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_rms_norm_backward.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_fused_rms_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor rstd, Tensor? weight, bool[2] output_mask) -> (Tensor, Tensor) +inline ::std::tuple _fused_rms_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, at::IntArrayRef normalized_shape, const at::Tensor & rstd, const ::std::optional & weight, ::std::array output_mask) { + return at::_ops::_fused_rms_norm_backward::call(grad_out, input, normalized_shape, rstd, weight, output_mask); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..69be06e0332ff58d9149a175356e8c6eb11155e4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_out(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +TORCH_API void _fused_sgd_kernel_cpu_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_kernel_cuda_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_tensor_lr_out(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +TORCH_API void _fused_sgd_kernel_cpu_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_kernel_cuda_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_copy_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..059f41911f3d55cc37b9b6d9da89c067d5985298 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_copy_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _fw_primal_copy_out(const at::Tensor & self, int64_t level, at::Tensor & out); +TORCH_API at::Tensor _fw_primal_copy(const at::Tensor & self, int64_t level); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8a294abafa4537c004b816a0fda4adf48b0f009b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fw_primal { + using schema = at::Tensor (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fw_primal"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fw_primal(Tensor(a) self, int level) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t level); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t level); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6d814108e192fd573e14d98d3b8881826e3582f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _grouped_mm(const at::Tensor & self, const at::Tensor & mat2, const ::std::optional & offs={}, const ::std::optional & bias={}, ::std::optional out_dtype=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0c5467659e037beca65e47de47735d752c93058e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _has_compatible_shallow_copy_type { + using schema = bool (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_has_compatible_shallow_copy_type"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_has_compatible_shallow_copy_type(Tensor self, Tensor from) -> bool"; + static bool call(const at::Tensor & self, const at::Tensor & from); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & from); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..da42843b41e00c7b598c360a93fff8ec18ae5a76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API bool _has_same_storage_numel(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..817d94a7c8888141bb174579e496df666f47edba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!) +inline at::Tensor & _index_put_impl_(at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl_::call(self, indices, values, accumulate, unsafe); +} + +// aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _index_put_impl_out(at::Tensor & out, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl_out::call(self, indices, values, accumulate, unsafe, out); +} +// aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _index_put_impl_outf(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, bool unsafe, at::Tensor & out) { + return at::_ops::_index_put_impl_out::call(self, indices, values, accumulate, unsafe, out); +} + +// aten::_index_put_impl(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor +inline at::Tensor _index_put_impl(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl::call(self, indices, values, accumulate, unsafe); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_indices_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_indices_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f23b7e36652a66851b1f51410c36d711bc5013e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_indices_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _indices_sparse(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h new file mode 100644 index 0000000000000000000000000000000000000000..f164af4080144d4eee050e1eb338c79d6b90a3ca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_is_all_true(Tensor self) -> Tensor +inline at::Tensor _is_all_true(const at::Tensor & self) { + return at::_ops::_is_all_true::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e4ab25ecf8dc5a0996401d0ca5700f2e265f8487 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _is_any_true { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_is_any_true"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_is_any_true(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e95d4879041920da7bf7fd8b6ccd9e51b421b8aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _linalg_check_errors(const at::Tensor & info, c10::string_view api_name, bool is_matrix); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_det_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_det_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..50544efcc052c94047080991aa2c3984678cc6e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_det_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _linalg_det { + using schema = ::std::tuple (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_det"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_linalg_det(Tensor A) -> (Tensor result, Tensor LU, Tensor pivots)"; + static ::std::tuple call(const at::Tensor & A); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A); +}; + +struct TORCH_API _linalg_det_result { + using schema = ::std::tuple (const at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_det"; + static constexpr const char* overload_name = "result"; + static constexpr const char* schema_str = "_linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)"; + static ::std::tuple call(const at::Tensor & A, at::Tensor & result, at::Tensor & LU, at::Tensor & pivots); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A, at::Tensor & result, at::Tensor & LU, at::Tensor & pivots); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9ac004633d68f78cedb79a881da9a9deb1d0bb71 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple _linalg_eigh(const at::Tensor & A, c10::string_view UPLO="L", bool compute_v=true); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f30baaf39f5d54367caf57d97b33f5b1abe584a2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _linalg_slogdet { + using schema = ::std::tuple (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_slogdet"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet, Tensor LU, Tensor pivots)"; + static ::std::tuple call(const at::Tensor & A); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A); +}; + +struct TORCH_API _linalg_slogdet_sign { + using schema = ::std::tuple (const at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_slogdet"; + static constexpr const char* overload_name = "sign"; + static constexpr const char* schema_str = "_linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)"; + static ::std::tuple call(const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet, at::Tensor & LU, at::Tensor & pivots); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet, at::Tensor & LU, at::Tensor & pivots); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_svd_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_svd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..27b1a2bee72c1ae1f92c3d5fe14303605094ee5c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_svd_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__linalg_svd_out : public at::meta::structured__linalg_svd { +void impl(const at::Tensor & A, bool full_matrices, bool compute_uv, ::std::optional driver, const at::Tensor & U, const at::Tensor & S, const at::Tensor & Vh); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_lstm_mps_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_lstm_mps_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..869edb3e0b7952d3e0a5154190c1b950c13d73c5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_lstm_mps_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _lstm_mps { + using schema = ::std::tuple (const at::Tensor &, at::TensorList, at::TensorList, bool, int64_t, double, bool, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_lstm_mps"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); +}; + +struct TORCH_API _lstm_mps_out { + using schema = ::std::tuple (const at::Tensor &, at::TensorList, at::TensorList, bool, int64_t, double, bool, bool, bool, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_lstm_mps"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_lstm_mps.out(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4, Tensor(f!) out5) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!), Tensor(f!))"; + static ::std::tuple call(const at::Tensor & input, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4, at::Tensor & out5); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4, at::Tensor & out5); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_make_dual_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_make_dual_native.h new file mode 100644 index 0000000000000000000000000000000000000000..38b601225f0748eca436fbc8b76f3ac661305bf6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_make_dual_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _make_dual(const at::Tensor & primal, const at::Tensor & tangent, int64_t level); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mixed_dtypes_linear_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mixed_dtypes_linear_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c8aef6c8a39231b151fbac1c7b6538be53904951 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mixed_dtypes_linear_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _mixed_dtypes_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & scale, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mkldnn_reshape_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mkldnn_reshape_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7c32f3cbc6f6e5c34081db8e1f77bcd8f4de727c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_mkldnn_reshape_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _mkldnn_reshape_out(const at::Tensor & self, at::IntArrayRef shape, at::Tensor & out); +TORCH_API at::Tensor mkldnn_reshape(const at::Tensor & self, at::IntArrayRef shape); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b4c04a0961d503ca2d811e46f41924ebc160211 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _native_batch_norm_legit_functional(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const at::Tensor & running_mean, const at::Tensor & running_var, bool training, double momentum, double eps); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c4dd0009f3ce6dae3ba1f6d02a3842d8b8499e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}, bool need_weights=true, bool average_attn_weights=true, ::std::optional mask_type=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..84da33281b26f6a948762765f6d22b578fbe06e2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _neg_view(const at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2d672eca61113a23cbd6eb2b4bc89f035bddc255 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _neg_view_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_neg_view_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_neg_view_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _neg_view_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_neg_view_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_neg_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d53f58291d86044d29cb302b53edab3cd870f628 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _nested_compute_contiguous_strides_offsets(const at::Tensor & nested_size); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_lengths_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_lengths_native.h new file mode 100644 index 0000000000000000000000000000000000000000..0463f07d0019e4a3a7c2e520af1d14cb76e15912 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_lengths_native.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_min_seqlen_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_min_seqlen_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..71b7ff400cd62816ce297b595c0f3739412f4db7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_min_seqlen_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nested_get_min_seqlen { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nested_get_min_seqlen"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nested_get_min_seqlen(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h new file mode 100644 index 0000000000000000000000000000000000000000..a3cab297d4530b934c3d86f74128b4465a3cead5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_nested_get_ragged_idx(Tensor self) -> int +inline int64_t _nested_get_ragged_idx(const at::Tensor & self) { + return at::_ops::_nested_get_ragged_idx::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_values_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_values_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5d999ada5b8f632fbb5c9e64beb73320d542d23a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_values_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nested_get_values_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nested_get_values_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nested_get_values_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _nested_get_values_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nested_get_values_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_nested_get_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h new file mode 100644 index 0000000000000000000000000000000000000000..293c03d886fdf51e7066ee164bd3e9c306fa1d90 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool NestedTensor_nested_tensor_from_mask_left_aligned(const at::Tensor & t, const at::Tensor & mask); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_buffer_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_buffer_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f41b2cc464280d80dc20ff324a36ece2a79b4517 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_buffer_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _nested_view_from_buffer(const at::Tensor & self, const at::Tensor & nested_size, const at::Tensor & nested_strides, const at::Tensor & offsets); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ecf9d3c78962ff9ba59c6f735f92aaa3e834177c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nested_view_from_jagged { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, int64_t, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nested_view_from_jagged"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & offsets, const at::Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & offsets, const at::Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h new file mode 100644 index 0000000000000000000000000000000000000000..79055b298fdfe4518840dcce70972bdb10636488 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool _nnpack_available(); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b7b738b548c3e52a42ce925f5684e3c5002ea668 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nnpack_available { + using schema = bool (); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nnpack_available"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nnpack_available() -> bool"; + static bool call(); + static bool redispatch(c10::DispatchKeySet dispatchKeySet); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnz_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnz_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7a761516e6c99243b396038b86192ba7263d7d2b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_nnz_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API int64_t _nnz_sparse(const at::Tensor & self); +TORCH_API int64_t _nnz_sparse_csr(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_pad_enum.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_pad_enum.h new file mode 100644 index 0000000000000000000000000000000000000000..62c018aa72acf1964441dd4de9a730b701d63b1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_pad_enum.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_pad_enum(Tensor self, SymInt[] pad, int mode, float? value=None) -> Tensor +inline at::Tensor _pad_enum(const at::Tensor & self, at::IntArrayRef pad, int64_t mode, ::std::optional value=::std::nullopt) { + return at::_ops::_pad_enum::call(self, c10::fromIntArrayRefSlow(pad), mode, value); +} +namespace symint { + template >> + at::Tensor _pad_enum(const at::Tensor & self, at::IntArrayRef pad, int64_t mode, ::std::optional value=::std::nullopt) { + return at::_ops::_pad_enum::call(self, c10::fromIntArrayRefSlow(pad), mode, value); + } +} + +// aten::_pad_enum(Tensor self, SymInt[] pad, int mode, float? value=None) -> Tensor +inline at::Tensor _pad_enum_symint(const at::Tensor & self, c10::SymIntArrayRef pad, int64_t mode, ::std::optional value=::std::nullopt) { + return at::_ops::_pad_enum::call(self, pad, mode, value); +} +namespace symint { + template >> + at::Tensor _pad_enum(const at::Tensor & self, c10::SymIntArrayRef pad, int64_t mode, ::std::optional value=::std::nullopt) { + return at::_ops::_pad_enum::call(self, pad, mode, value); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_remove_batch_dim_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_remove_batch_dim_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a91f68e8010d0c4ecd3e45eb0eef57dbe87711f4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_remove_batch_dim_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _remove_batch_dim(const at::Tensor & self, int64_t level, int64_t batch_size, int64_t out_dim); +TORCH_API at::Tensor _remove_batch_dim_symint(const at::Tensor & self, int64_t level, c10::SymInt batch_size, int64_t out_dim); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ae025b3bb26ea6c8025c844ae1455f528c3983dc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _reshape_alias(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride); +TORCH_API at::Tensor _reshape_alias_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..584c3b1f737bcf6678379f4395bde283f0ef3f2c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_alias_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _reshape_alias(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride); +TORCH_API at::Tensor _reshape_alias_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6333c85c5f6f0a8a8204737b8d1f7fc9360a692a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _reshape_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..39a985a1dfd9a5849611510c0a54639f1622b32a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_copy_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _reshape_copy { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_reshape_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_reshape_copy(Tensor self, SymInt[] size) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b7456e17456822937c048b33c9b1032dcceb8b90 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _reshape_from_tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_reshape_from_tensor"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & shape); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & shape); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..55416754c267359e964e7ee4b62159f503b8b99e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _resize_output(const at::Tensor & self, at::IntArrayRef size, at::Device device); +TORCH_API at::Tensor _resize_output_symint(const at::Tensor & self, c10::SymIntArrayRef size, at::Device device); +TORCH_API const at::Tensor & _resize_output_out(const at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, at::Device device); +TORCH_API const at::Tensor & _resize_output_outf(const at::Tensor & self, at::IntArrayRef size, at::Device device, const at::Tensor & out); +TORCH_API const at::Tensor & _resize_output_symint_out(const at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, at::Device device); +TORCH_API const at::Tensor & _resize_output_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Device device, const at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9835447413067c6db08970d5dcd0ecc051f1386e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_resize_output_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _resize_output_symint(const at::Tensor & self, c10::SymIntArrayRef size, at::Device device); +TORCH_API const at::Tensor & _resize_output_out_symint(const at::Tensor & self, c10::SymIntArrayRef size, at::Device device, const at::Tensor & out); +TORCH_API const at::Tensor & _resize_output_(const at::Tensor & self, at::IntArrayRef size, at::Device device); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2ac9fadf988111129946ada737bd72e2fd4e065c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _safe_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a627e20eda53e7a74f4ec7916d1c5bca5069924d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _scaled_dot_product_cudnn_attention_backward_cuda(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & philox_seed, const at::Tensor & philox_offset, const at::Tensor & attn_bias, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, bool is_causal, ::std::optional scale=::std::nullopt); +TORCH_API ::std::tuple _scaled_dot_product_cudnn_attention_nestedtensor_backward_cuda(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & philox_seed, const at::Tensor & philox_offset, const at::Tensor & attn_bias, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, bool is_causal, ::std::optional scale=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..92437bc2141b19ed04aafef380123dfba3a1fbae --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_cudnn_attention_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _scaled_dot_product_cudnn_attention { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, bool, double, bool, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_scaled_dot_product_cudnn_attention"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)"; + static ::std::tuple call(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const ::std::optional & attn_bias, bool compute_log_sumexp, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const ::std::optional & attn_bias, bool compute_log_sumexp, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..01f3c1869058e03962ffcd91e5f80474807d67c3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _scaled_dot_product_flash_attention_for_cpu_backward(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask={}, ::std::optional scale=::std::nullopt); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ff938f225e690595d866a26d9e6c84d86ebe8344 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _scaled_dot_product_flash_attention_for_cpu_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, double, bool, const ::std::optional &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_scaled_dot_product_flash_attention_for_cpu_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)"; + static ::std::tuple call(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3eee1d697518f54995e06c173e7d7a4395195a33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _scaled_dot_product_fused_attention_overrideable_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, ::std::array, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, c10::SymInt, double, bool, const at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_scaled_dot_product_fused_attention_overrideable_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)"; + static ::std::tuple call(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & attn_bias, ::std::array grad_input_mask, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, ::std::optional scale); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & attn_bias, ::std::array grad_input_mask, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, ::std::optional scale); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_mm_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_mm_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fd47e0e4d083d54871f2873b8532a336e6bd7396 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_mm_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _scaled_mm(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scale_a, const at::Tensor & scale_b, const ::std::optional & bias={}, const ::std::optional & scale_result={}, ::std::optional out_dtype=::std::nullopt, bool use_fast_accum=false); +TORCH_API at::Tensor & _scaled_mm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scale_a, const at::Tensor & scale_b, const ::std::optional & bias={}, const ::std::optional & scale_result={}, ::std::optional out_dtype=::std::nullopt, bool use_fast_accum=false); +TORCH_API at::Tensor & _scaled_mm_outf(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scale_a, const at::Tensor & scale_b, const ::std::optional & bias, const ::std::optional & scale_result, ::std::optional out_dtype, bool use_fast_accum, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ff9fc9ddb9821271559da9c2f0501d60c21e0a5e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor _slow_conv2d_forward_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fdd9bc211d90a469dda59594c3ea1a1f4241265b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & _sobol_engine_initialize_state_(at::Tensor & self, int64_t dimension); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..06f2c97abd716784d1651dea8fdb640c77571c19 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_softmax_backward_cpu_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +struct TORCH_API structured_softmax_backward_cuda_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +TORCH_API at::Tensor nested_softmax_backward(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c33e482f9b0ea0edfce5f8ead9bd10a46c51ed08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e97c11d3f3376bccc9770f874d598ce6b97a0bf0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fe8760fcb01e6543cd18a3ff8c5b70b4b5e117dd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional is_coalesced=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f714b064cdd1c27145589f79188662e2e325b4f9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..0643e714a35b1ad5a72bb7f2a0ea2be02a98dfdf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, dtype, layout, device, pin_memory); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h new file mode 100644 index 0000000000000000000000000000000000000000..3e44a0c449eb3bb2a7ef0d16bca5992d1d5b03cc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_csr_sum(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype::call(self, dim, keepdim, dtype); +} + +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..988d130fcbe0daf6580d0b2395f5e91a3172b5b8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a024d73dc676d7e32548e3bba054e390852f3eca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_log_softmax_out(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +TORCH_API at::Tensor log_softmax_sparse_cpu(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor log_softmax_sparse_cuda(const at::Tensor & self, int64_t dim, bool half_to_float); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c62dd2b281cbe724eac227e1ca7bd95be96037dd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mm_reduce_impl_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::string_view, const at::Tensor &, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm_reduce_impl_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..748f9073708da0c88d729423f0e7ac49e2af1cc4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_apply { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_apply"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & thread_masks); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & thread_masks); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ca26cc6ec93e3c63cfa1997b36d38a77889fb66d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt, ::std::optional out_dtype=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..505b1848ef7fa9d1bbc7a4c2f73c03aa8c95755d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _sparse_semi_structured_tile(const at::Tensor & input, c10::string_view algorithm="", bool use_cutlass=true); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..375c76c840f998b4f69cf5b8d551232d5b483ed0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_tile { + using schema = ::std::tuple (const at::Tensor &, c10::string_view, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_tile"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_tile(Tensor input, str algorithm=\"\", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6b50153445c835be9d18b02510fe718aea96ee7f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_sum_backward_out(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); +TORCH_API at::Tensor _sparse_sum_backward_cpu(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor _sparse_sum_backward_cuda(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5870bd78408451fb249cf2c483e7f106c5a63bb6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::ScalarType dtype); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9e9dc4f10e7c27796c55c43c9b297c778c666a3e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _spdiags { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor"; + static at::Tensor call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); +}; + +struct TORCH_API _spdiags_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b8e638fcf4dca583c24cf6c16a789e6945cc799b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _standard_gamma_grad_out(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); +TORCH_API at::Tensor _standard_gamma_grad_cpu(const at::Tensor & self, const at::Tensor & output); +TORCH_API at::Tensor _standard_gamma_grad_cuda(const at::Tensor & self, const at::Tensor & output); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5e7fac175591daf4ad77ebb517280be10317a123 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _standard_gamma_grad { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_standard_gamma_grad(Tensor self, Tensor output) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & output); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output); +}; + +struct TORCH_API _standard_gamma_grad_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c43e3731438b9f88bdd6355bc2ff86faef870103 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_ambiguous_defaults_a { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_ambiguous_defaults"; + static constexpr const char* overload_name = "a"; + static constexpr const char* schema_str = "_test_ambiguous_defaults.a(Tensor dummy, int a=1, int b=1) -> Tensor"; + static at::Tensor call(const at::Tensor & dummy, int64_t a, int64_t b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dummy, int64_t a, int64_t b); +}; + +struct TORCH_API _test_ambiguous_defaults_b { + using schema = at::Tensor (const at::Tensor &, int64_t, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_ambiguous_defaults"; + static constexpr const char* overload_name = "b"; + static constexpr const char* schema_str = "_test_ambiguous_defaults.b(Tensor dummy, int a=2, str b=\"2\") -> Tensor"; + static at::Tensor call(const at::Tensor & dummy, int64_t a, c10::string_view b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dummy, int64_t a, c10::string_view b); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f99343056c04f4b7aa47204d717b09368c8be442 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view(const at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..173438b4df9d77a941e763ee8f05662212b75612 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _test_optional_floatlist(const at::Tensor & values, ::std::optional> addends); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..39a4c53adf9cd74f8fb07286093a8662f5215465 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_optional_intlist { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_optional_intlist(Tensor values, int[]? addends) -> Tensor"; + static at::Tensor call(const at::Tensor & values, at::OptionalIntArrayRef addends); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends); +}; + +struct TORCH_API _test_optional_intlist_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..63f120845a22502351f675d77af0f4ff5f1cb055 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_string_default(const at::Tensor & dummy, c10::string_view a="\"'\\", c10::string_view b="\"'\\"); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2c592c9f0d48fcd30282131dfacd63c8d8785c77 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_differentiable_lstm_cell_backward { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_differentiable_lstm_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f53a8e35fecca34eee858d1de1eefea566c89514 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_gru_cell_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +TORCH_API ::std::tuple _thnn_fused_gru_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..75e708b50cbe704e84145848cb2a621f3d120148 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_lstm_cell { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); +}; + +struct TORCH_API _thnn_fused_lstm_cell_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f1075534b76ab207962d11087409405cd79cc6d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..87f7dfc1ff2ece7eaba0a4cbd320011db6b52c81 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_bsc_out(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h new file mode 100644 index 0000000000000000000000000000000000000000..e0e7dc1b3e7ad9676383f026fb05c2ffd06f4eb6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7b7b66faab768e738f1a355b71b70dcf69a5a2a0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h new file mode 100644 index 0000000000000000000000000000000000000000..fd79b2758a5f6f3eff32c4945c6cbaed3f72be55 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor) +inline ::std::tuple _to_sparse_semi_structured(const at::Tensor & dense) { + return at::_ops::_to_sparse_semi_structured::call(dense); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..f58b18e8f3baba28cd093124c07da0b732540ec1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor +inline at::Tensor _triton_scaled_dot_attention(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention::call(q, k, v, dropout_p); +} + +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_out(at::Tensor & out, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_outf(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5c425c30cef858f970959b0f9c1d78890ded326b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unpack_dual { + using schema = ::std::tuple (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unpack_dual"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)"; + static ::std::tuple call(const at::Tensor & dual, int64_t level); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dual, int64_t level); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..53213a7c208ae08d6838f1c987f969c9d6f77123 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_index_Tensor { + using schema = at::Tensor (const at::Tensor &, const c10::List<::std::optional> &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_index"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const c10::List<::std::optional> & indices); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_view.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_view.h new file mode 100644 index 0000000000000000000000000000000000000000..eab2ccb948d0f51f57ccbc89f1cdf76621fd68ca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_view.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view::call(self, c10::fromIntArrayRefSlow(size)); +} +namespace symint { + template >> + at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view::call(self, c10::fromIntArrayRefSlow(size)); + } +} + +// aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor _unsafe_view_symint(const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view::call(self, size); +} +namespace symint { + template >> + at::Tensor _unsafe_view(const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view::call(self, size); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, size, out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, size, out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, size, out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, size, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..e88d05badd8ca81e2cccab4d15a7babb09a13058 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bicubic2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..eb802d863f20a07c64b540b5bf3fb9ff822fa222 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bilinear2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3b4dbbbf4ee7b1a6c3874e23642db2f0cda67641 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2fba9da16aac666049de23ff512fc1f15e725a50 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_nearest_exact1d_out_cpu : public at::meta::structured__upsample_nearest_exact1d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_nearest_exact1d_out_cuda : public at::meta::structured__upsample_nearest_exact1d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h new file mode 100644 index 0000000000000000000000000000000000000000..d4cd4c1f51eea2efdd816feb159ee1d5d8e59729 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..59e12db7b6b9a49fc86e8ee07d30898b312beff7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact2d : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f51637bedd0660bce2c337de0252ba798bbc0e47 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..56f4d53d6c5d8b8ca4319d0ed3aeae140fe344ea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API void _validate_compressed_sparse_indices(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2c337a3d70c9f8c872dfb1c0dfa7134fbb5ac032 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_coo_tensor_args(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced=::std::nullopt, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..650d8f96de3e9159123ce60a2b2cdca5219a3b53 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _values_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_values_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _values_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ca668d18900e7fa6876e928f0f381d7be4151b52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t _version(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h new file mode 100644 index 0000000000000000000000000000000000000000..ccc931e66457e1e241ed6601a3bff9a5dcb0e2f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor +inline at::Tensor _weight_int4pack_mm_with_scales_and_zeros(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScale, const at::Tensor & qZeros) { + return at::_ops::_weight_int4pack_mm_with_scales_and_zeros::call(self, mat2, qGroupSize, qScale, qZeros); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..47dc303a7b5dd3828b657bfe7af5399d5ccc1db4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _weight_int8pack_mm(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f48affb80efba30a742c4085b0717fe6893da769 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _weight_norm_interface_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d52db1eb8f2d8479aa337bce75055dddc26f1c33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_norm_interface { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & v, const at::Tensor & g, int64_t dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & v, const at::Tensor & g, int64_t dim); +}; + +struct TORCH_API _weight_norm_interface_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h new file mode 100644 index 0000000000000000000000000000000000000000..dd96dc2e582980b4ca2488bfa612e40b423295f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor +inline at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel) { + return at::_ops::_wrapped_quantized_linear_prepacked::call(input, input_scale, input_zero_point, packed_weight, output_scale, output_zero_point, out_channel); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a81eb440a7c68263c87ddfe2aa330d98a967971c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/abs.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/abs.h new file mode 100644 index 0000000000000000000000000000000000000000..f71b287312338c247c0fa73b9da8904108cc567d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/abs.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::abs(Tensor self) -> Tensor +inline at::Tensor abs(const at::Tensor & self) { + return at::_ops::abs::call(self); +} + +// aten::abs_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & abs_(at::Tensor & self) { + return at::_ops::abs_::call(self); +} + +// aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & abs_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::abs_out::call(self, out); +} +// aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & abs_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::abs_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/absolute.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/absolute.h new file mode 100644 index 0000000000000000000000000000000000000000..73de17c9f07aba69e1a53d0161f9dae388fa0923 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/absolute.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::absolute(Tensor self) -> Tensor +inline at::Tensor absolute(const at::Tensor & self) { + return at::_ops::absolute::call(self); +} + +// aten::absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & absolute_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::absolute_out::call(self, out); +} +// aten::absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & absolute_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::absolute_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..726187c78208dd82836457c2e8fdbc6dc46f2218 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor acosh(const at::Tensor & self); +TORCH_API at::Tensor & acosh_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..a69f8e31f84385be031bf87486854421871cfdf3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_acosh : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_native.h new file mode 100644 index 0000000000000000000000000000000000000000..84bdd2b1b61b121b815fc90df3ed2170ef4f7c2b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/acosh_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_acosh_out : public at::meta::structured_acosh { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h new file mode 100644 index 0000000000000000000000000000000000000000..e51e694346350608846f08ff310bcdbfdd02137f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); + } +} + +// aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor +inline at::Tensor adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, c10::fromIntArrayRefSlow(output_size)); +} +namespace symint { + template >> + at::Tensor adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, c10::fromIntArrayRefSlow(output_size)); + } +} + +// aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor +inline at::Tensor adaptive_avg_pool3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, output_size); +} +namespace symint { + template >> + at::Tensor adaptive_avg_pool3d(const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, output_size); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c0e8415486f740c4d3a9870c4b3345cfcf52a880 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple adaptive_max_pool1d(const at::Tensor & self, at::IntArrayRef output_size); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8a7ec7a93c6911d226ee42e781a1709f2f8a55c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor adaptive_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices); +TORCH_API at::Tensor & adaptive_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices); +TORCH_API at::Tensor & adaptive_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46a5ec97418393c9f8f38ee49bc30c73ed9e3352 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple adaptive_max_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API ::std::tuple adaptive_max_pool2d_out(at::Tensor & out, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API ::std::tuple adaptive_max_pool2d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out, at::Tensor & indices); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addcdiv_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addcdiv_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..69db0a143e74d0a0d6dfeda39e5bcae57eb89a2a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addcdiv_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor addcdiv(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1); +TORCH_API at::Tensor & addcdiv_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1); +TORCH_API at::Tensor & addcdiv_outf(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value, at::Tensor & out); +TORCH_API at::Tensor & addcdiv_(at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addmv_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addmv_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b7918d99300b591eb3707a562134751135364696 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addmv_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor addmv(const at::Tensor & self, const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & addmv_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & addmv_outf(const at::Tensor & self, const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & addmv_(at::Tensor & self, const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addr_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addr_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b23550d12162ce1e51dda78803bf0627dc91e9cd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/addr_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor addr(const at::Tensor & self, const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & addr_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & addr_outf(const at::Tensor & self, const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & addr_(at::Tensor & self, const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..e1a60453a4478b00b70f42ac6154d8aa4c0930cd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::alias_copy(Tensor self) -> Tensor +inline at::Tensor alias_copy(const at::Tensor & self) { + return at::_ops::alias_copy::call(self); +} + +// aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & alias_copy_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::alias_copy_out::call(self, out); +} +// aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & alias_copy_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::alias_copy_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..27f6d37cb2608c909137e92cdbb1c2d3e04c3ece --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API alias_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::alias_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "alias_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API alias_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::alias_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h new file mode 100644 index 0000000000000000000000000000000000000000..d90cfb04ec8fd1fc80fd1a072a235b880d9418f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..90fe8943ac0a0928f97c45e901baea0b83db5696 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor all(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false); +TORCH_API at::Tensor & all_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false); +TORCH_API at::Tensor & all_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..087f5eec4ce498346a8fccdcc5cc420e0e289ef0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API all_dim { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dim"; + static constexpr const char* schema_str = "all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool keepdim); +}; + +struct TORCH_API all_dims { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dims"; + static constexpr const char* schema_str = "all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim); +}; + +struct TORCH_API all_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all_dims_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dims_out"; + static constexpr const char* schema_str = "all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all_dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dimname"; + static constexpr const char* schema_str = "all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, bool keepdim); +}; + +struct TORCH_API all_dimname_out { + using schema = at::Tensor & (const at::Tensor &, at::Dimname, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dimname_out"; + static constexpr const char* schema_str = "all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "all(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API all_all_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "all_out"; + static constexpr const char* schema_str = "all.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5ba5108bcb96f86e71e67b96b3641e38d921e713 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API allclose { + using schema = bool (const at::Tensor &, const at::Tensor &, double, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::allclose"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool"; + static bool call(const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..270313789abb721917b88e62ce75ea886d1c6e15 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor amin(const at::Tensor & self, at::IntArrayRef dim={}, bool keepdim=false); +TORCH_API at::Tensor & amin_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim={}, bool keepdim=false); +TORCH_API at::Tensor & amin_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3666356a38a3756c2d418f92d54965f7320ab723 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/amin_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_amin_out : public at::meta::structured_amin { +void impl(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h new file mode 100644 index 0000000000000000000000000000000000000000..09ca85ffe2803331c2c6bf3f3aa0627223ede533 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max) +inline ::std::tuple aminmax(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false) { + return at::_ops::aminmax::call(self, dim, keepdim); +} + +// aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max) +inline ::std::tuple aminmax_out(at::Tensor & min, at::Tensor & max, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false) { + return at::_ops::aminmax_out::call(self, dim, keepdim, min, max); +} +// aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max) +inline ::std::tuple aminmax_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & min, at::Tensor & max) { + return at::_ops::aminmax_out::call(self, dim, keepdim, min, max); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f05bb3b50b45ac7b2b02a4d4c43dc86d4626d7e0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple aminmax(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API ::std::tuple aminmax_out(at::Tensor & min, at::Tensor & max, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API ::std::tuple aminmax_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & min, at::Tensor & max); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/angle.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/angle.h new file mode 100644 index 0000000000000000000000000000000000000000..805c430ef29f0acfbec26387f38a2fecea2dc040 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/angle.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::angle(Tensor self) -> Tensor +inline at::Tensor angle(const at::Tensor & self) { + return at::_ops::angle::call(self); +} + +// aten::angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & angle_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::angle_out::call(self, out); +} +// aten::angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & angle_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::angle_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3021a8513de20002e9d8458043fe0bc4255969dd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor & arange_out(at::Tensor & out, const at::Scalar & start, const at::Scalar & end, const at::Scalar & step); +TORCH_API at::Tensor & arange_outf(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d29004da740199ba87ee2593bcb8f9b07fd4e12e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arange_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor arange(const at::Scalar & end, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor & arange_out(const at::Scalar & end, at::Tensor & out); +TORCH_API at::Tensor arange(const at::Scalar & start, const at::Scalar & end, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor arange(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step=1, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor & arange_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +TORCH_API at::Tensor & arange_cuda_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..60d201d91e8f07ed6ad10ba8cdc0bfe2930d7978 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor arccos(const at::Tensor & self); +TORCH_API at::Tensor & arccos_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & arccos_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & arccos_(at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..07103c0c4fc136690fe572321eb5dc4c9a88794b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arccos_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API arccos { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arccos"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "arccos(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API arccos_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arccos_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "arccos_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API arccos_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arccos"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arcsinh_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arcsinh_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2f5fdefd337847ab7494edd773195b18c8ac218f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arcsinh_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API arcsinh { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arcsinh"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "arcsinh(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API arcsinh_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arcsinh_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "arcsinh_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API arcsinh_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::arcsinh"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a311807d68e19f66b86e244ad2b2f3f4ed79e313 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor arctan(const at::Tensor & self); +TORCH_API at::Tensor & arctan_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & arctan_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & arctan_(at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c381b9d7b321971201817b0339579bc593cfb5c9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor argmax(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmax_out(at::Tensor & out, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmax_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2eda6bc19c39370e9a3b6572331c0831075f72a4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_argmax_out : public at::meta::structured_argmax { +void impl(const at::Tensor & self, ::std::optional dim, bool keepdim, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b1c34059668c01a174eaaee234d1e8533ab4054e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API argmax { + using schema = at::Tensor (const at::Tensor &, ::std::optional, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::argmax"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dim, bool keepdim); +}; + +struct TORCH_API argmax_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::argmax"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "argmax.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmin_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmin_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1ebede7c26fd0590aeca84dc2903b671f68ef867 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/argmin_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor argmin(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmin_out(at::Tensor & out, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmin_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/as_strided_copy_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/as_strided_copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f66c725027c96e0ccf657256d3ef171ea5162b18 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/as_strided_copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & as_strided_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt); +TORCH_API at::Tensor & as_strided_copy_outf(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset, at::Tensor & out); +TORCH_API at::Tensor & as_strided_copy_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt); +TORCH_API at::Tensor & as_strided_copy_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..428968fff99452cbc51bc1ea88cdde43b56c6668 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_asin : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6de155c7a80ceab7cbb8d636d8cfbd702c67a4c9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor atan2(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & atan2_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & atan2_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & atan2_(at::Tensor & self, const at::Tensor & other); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0c304e6de0556b4235dd906dd6078041aab1164e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API atan2_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::atan2"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API atan2_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::atan2_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API atan2 { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::atan2"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "atan2(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a9de11a123c381f93ca82a3bfad8d3bb3c3736c0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atan_native.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_atan_out : public at::meta::structured_atan { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +TORCH_API at::Tensor atan_sparse(const at::Tensor & self); +TORCH_API at::Tensor & atan_sparse_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & atan_sparse_(at::Tensor & self); +TORCH_API at::Tensor atan_sparse_csr(const at::Tensor & self); +TORCH_API at::Tensor & atan_sparse_csr_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & atan_sparse_csr_(at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atleast_3d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atleast_3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..189a67305ecc1f1ec07e6acd7d5fc7c788eb61a7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/atleast_3d_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor atleast_3d(const at::Tensor & self); +TORCH_API ::std::vector atleast_3d(at::TensorList tensors); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..50e36355f9f69e2d123b5b36ca9b5b5b1f1d926f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor avg_pool1d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, bool ceil_mode=false, bool count_include_pad=true); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h new file mode 100644 index 0000000000000000000000000000000000000000..41324edfe602fbc0c1675c83abcd65061fd758d3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm::call(self, batch1, batch2, beta, alpha); +} + +// aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_out::call(self, batch1, batch2, beta, alpha, out); +} +// aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::baddbmm_out::call(self, batch1, batch2, beta, alpha, out); +} + +// aten::baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_dtype::call(self, batch1, batch2, out_dtype, beta, alpha); +} + +// aten::baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_dtype_out::call(self, batch1, batch2, out_dtype, beta, alpha, out); +} +// aten::baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::baddbmm_dtype_out::call(self, batch1, batch2, out_dtype, beta, alpha, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b51e2171b90847ea6ab3c0299b2a7bfc1d0f6b06 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & baddbmm_(at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cb970fa2106cb187d5a7251c17ba62bdaa0c7385 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, bool update, double eps, ::std::array output_mask, const at::Tensor & reserve); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b72ec59b28c2d40918bf4b76fa5d0bef4bff59f6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & batch_norm_backward_elemt_out(at::Tensor & out, const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const ::std::optional & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count); +TORCH_API at::Tensor & batch_norm_backward_elemt_outf(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const ::std::optional & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..65c7027d2e996be80c075a8f72460939d93dbd2c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API bernoulli { + using schema = at::Tensor (const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "bernoulli(Tensor self, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator); +}; + +struct TORCH_API bernoulli_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator, at::Tensor & out); +}; + +struct TORCH_API bernoulli__Tensor { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli_"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & p, ::std::optional generator); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & p, ::std::optional generator); +}; + +struct TORCH_API bernoulli__float { + using schema = at::Tensor & (at::Tensor &, double, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli_"; + static constexpr const char* overload_name = "float"; + static constexpr const char* schema_str = "bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, double p, ::std::optional generator); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, double p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_p { + using schema = at::Tensor (const at::Tensor &, double, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "p"; + static constexpr const char* schema_str = "bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, double p, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_Tensor_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "Tensor_out"; + static constexpr const char* schema_str = "bernoulli.Tensor_out(Tensor self, Tensor p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & p, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & p, ::std::optional generator, at::Tensor & out); +}; + +struct TORCH_API bernoulli_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "bernoulli.Tensor(Tensor self, Tensor p, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & p, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_float_out { + using schema = at::Tensor & (const at::Tensor &, double, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "float_out"; + static constexpr const char* schema_str = "bernoulli.float_out(Tensor self, float p=0.5, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, double p, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double p, ::std::optional generator, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h new file mode 100644 index 0000000000000000000000000000000000000000..1aad4a5dc864b9ebda94bc0108a89a1a051fac9d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor +inline at::Tensor bilinear(const at::Tensor & input1, const at::Tensor & input2, const at::Tensor & weight, const ::std::optional & bias={}) { + return at::_ops::bilinear::call(input1, input2, weight, bias); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9680788849be6c5e5aeb366a2dcec455977016a3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor binary_cross_entropy_with_logits(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight={}, const ::std::optional & pos_weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & binary_cross_entropy_with_logits_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight={}, const ::std::optional & pos_weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & binary_cross_entropy_with_logits_outf(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, const ::std::optional & pos_weight, int64_t reduction, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h new file mode 100644 index 0000000000000000000000000000000000000000..d6a55bd8de4fa3be1f9eae37a638882c46d4bb7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); +} +namespace symint { + template >> + at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); + } +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor bincount_symint(const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); +} +namespace symint { + template >> + at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, int64_t minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, int64_t minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_symint_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_symint_outf(const at::Tensor & self, const ::std::optional & weights, c10::SymInt minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, c10::SymInt minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6aafa2a84e2ca9e08d17050925451da305024016 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor bitwise_and(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_and_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_and_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_and_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor bitwise_and(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..865a5211af86c624d18da94f5361e24060308320 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor bitwise_and(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_and_(at::Tensor & self, const at::Tensor & other); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_left_shift_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_left_shift_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..90e67408a6354a3d2b770a04e369daaa47bdea32 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_left_shift_compositeexplicitautograd_dispatch.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor bitwise_left_shift(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_left_shift_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_left_shift_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_left_shift_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor bitwise_left_shift(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_left_shift_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_left_shift_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor.h new file mode 100644 index 0000000000000000000000000000000000000000..57f59aacee5d6a2b1fc7de025cf78de8177095a5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::bitwise_xor.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::bitwise_xor_Tensor_out::call(self, other, out); +} +// aten::bitwise_xor.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::bitwise_xor_Tensor_out::call(self, other, out); +} + +// aten::bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) { + return at::_ops::bitwise_xor_Scalar_out::call(self, other, out); +} +// aten::bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) { + return at::_ops::bitwise_xor_Scalar_out::call(self, other, out); +} + +// aten::bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor bitwise_xor(const at::Tensor & self, const at::Scalar & other) { + return at::_ops::bitwise_xor_Scalar::call(self, other); +} + +// aten::bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor +inline at::Tensor bitwise_xor(const at::Scalar & self, const at::Tensor & other) { + return at::_ops::bitwise_xor_Scalar_Tensor::call(self, other); +} + +// aten::bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor bitwise_xor(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::bitwise_xor_Tensor::call(self, other); +} + +// aten::bitwise_xor.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other) { + return at::_ops::bitwise_xor_Scalar_Tensor_out::call(self, other, out); +} +// aten::bitwise_xor.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bitwise_xor_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::bitwise_xor_Scalar_Tensor_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f26513a64fc2c97193efcdddaec6ea3046aed447 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor bitwise_xor(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_xor_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_xor_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor bitwise_xor(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_xor_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h new file mode 100644 index 0000000000000000000000000000000000000000..c05d73eaf8b05699698dd6b73f14ec2ea93bafbc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::blackman_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::blackman_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::blackman_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::blackman_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::blackman_window_out::call(window_length, out); +} +// aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::blackman_window_out::call(window_length, out); +} + +// aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::blackman_window_periodic_out::call(window_length, periodic, out); +} +// aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::blackman_window_periodic_out::call(window_length, periodic, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2c1c2778444d8f95a42effcfff082da9a02a6f8a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor block_diag(at::TensorList tensors); +TORCH_API at::Tensor & block_diag_out(at::Tensor & out, at::TensorList tensors); +TORCH_API at::Tensor & block_diag_outf(at::TensorList tensors, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bmm_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bmm_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..28c9ddcd07faf396d7dfdc19cc43193ac947efd8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/bmm_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_bmm : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, const at::Tensor & mat2); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/can_cast_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/can_cast_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c15018bee82614f8a69ee57b81b378b3cc84b4d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/can_cast_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API can_cast { + using schema = bool (at::ScalarType, at::ScalarType); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::can_cast"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "can_cast(ScalarType from_, ScalarType to) -> bool"; + static bool call(at::ScalarType from_, at::ScalarType to); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, at::ScalarType from_, at::ScalarType to); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ad6f373fd8772e178b02974ef1939a1b4766c70a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor cat(const at::ITensorListRef & tensors, int64_t dim=0); +TORCH_API at::Tensor & cat_out(at::Tensor & out, const at::ITensorListRef & tensors, int64_t dim=0); +TORCH_API at::Tensor & cat_outf(const at::ITensorListRef & tensors, int64_t dim, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fc00c7b9617daed326ae4a730d609793e7d61d4d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor & cauchy_(at::Tensor & self, double median=0, double sigma=1, ::std::optional generator=::std::nullopt); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8cfb210b5332c3000bb830f4ad54d6b29995a387 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cauchy_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & cauchy_(at::Tensor & self, double median=0, double sigma=1, ::std::optional generator=::std::nullopt); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..ef1d9199bcd95f9496e7aa1788fc6011e655f8e9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices_copy_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..44d4bff72cb410ac2221f8e450f119edb27b902f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor ccol_indices_copy(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2ad8dbb761281160514b9acce65ce4abb8a1bd50 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor ceil(const at::Tensor & self); +TORCH_API at::Tensor & ceil_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & ceil_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & ceil_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c49fe7997cffa7d47c0f28d56d166ef363c7d16d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cholesky_out { + using schema = at::Tensor & (const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cholesky"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, bool upper, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool upper, at::Tensor & out); +}; + +struct TORCH_API cholesky { + using schema = at::Tensor (const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cholesky"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cholesky(Tensor self, bool upper=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, bool upper); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool upper); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce6be074f41cc977e39180b73bdbce673cdb5bb8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor cholesky_solve(const at::Tensor & self, const at::Tensor & input2, bool upper=false); +TORCH_API at::Tensor & cholesky_solve_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & input2, bool upper=false); +TORCH_API at::Tensor & cholesky_solve_outf(const at::Tensor & self, const at::Tensor & input2, bool upper, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..019e0f4ac062508f909dc2924348fbe91e834309 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cholesky_solve_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cholesky_solve_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cholesky_solve"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & input2, bool upper, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & input2, bool upper, at::Tensor & out); +}; + +struct TORCH_API cholesky_solve { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cholesky_solve"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & input2, bool upper); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & input2, bool upper); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/choose_qparams_optimized_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/choose_qparams_optimized_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..1a84d4e45d4f78a5325da0156fdb187f1da3122b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/choose_qparams_optimized_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API choose_qparams_optimized { + using schema = ::std::tuple (const at::Tensor &, int64_t, int64_t, double, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::choose_qparams_optimized"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..871565fd8a43c1696342cd588f08c1a1c671fc4d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Scalar & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Tensor & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Tensor & max); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..647351258de189b623d61af30f7d5524a9c703c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Scalar & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Tensor & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Tensor & max); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1d84e61e5a0cf811c564d1b5b151a3dbec5d4ba4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_cpu_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Scalar & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Tensor & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Tensor & min); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d661d29f41150246ca0d50517c37a5789aca5b8a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Scalar & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Tensor & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Tensor & min); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..448c94998ec59e3bba8819d0a8cdc3829387da40 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API col2im_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::col2im"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "col2im.out(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out); +}; + +struct TORCH_API col2im { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::col2im"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/column_stack_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/column_stack_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d64ee54eb857c591507b11d083b455915fd9e77d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/column_stack_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor column_stack(at::TensorList tensors); +TORCH_API at::Tensor & column_stack_out(at::Tensor & out, at::TensorList tensors); +TORCH_API at::Tensor & column_stack_outf(at::TensorList tensors, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h new file mode 100644 index 0000000000000000000000000000000000000000..0866f3b02b10f75b9af86bc23a7aad9cfefb9655 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::concatenate(Tensor[] tensors, int dim=0) -> Tensor +inline at::Tensor concatenate(at::TensorList tensors, int64_t dim=0) { + return at::_ops::concatenate::call(tensors, dim); +} + +// aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0) { + return at::_ops::concatenate_out::call(tensors, dim, out); +} +// aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_outf(at::TensorList tensors, int64_t dim, at::Tensor & out) { + return at::_ops::concatenate_out::call(tensors, dim, out); +} + +// aten::concatenate.names(Tensor[] tensors, Dimname dim) -> Tensor +inline at::Tensor concatenate(at::TensorList tensors, at::Dimname dim) { + return at::_ops::concatenate_names::call(tensors, dim); +} + +// aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_out(at::Tensor & out, at::TensorList tensors, at::Dimname dim) { + return at::_ops::concatenate_names_out::call(tensors, dim, out); +} +// aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_outf(at::TensorList tensors, at::Dimname dim, at::Tensor & out) { + return at::_ops::concatenate_names_out::call(tensors, dim, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ef26c9bd3f294fc59949091a8abdab907cb15dbb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conj(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h new file mode 100644 index 0000000000000000000000000000000000000000..33c196fb5826fdf8e12197557d3d8361540a78f6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::conj_physical(Tensor self) -> Tensor +inline at::Tensor conj_physical(const at::Tensor & self) { + return at::_ops::conj_physical::call(self); +} + +// aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & conj_physical_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::conj_physical_out::call(self, out); +} +// aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & conj_physical_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::conj_physical_out::call(self, out); +} + +// aten::conj_physical_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & conj_physical_(at::Tensor & self) { + return at::_ops::conj_physical_::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..903e8c1a3a31a309d8acf5c61177e45b4e8038ac --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API conv2d { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::conv2d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); +}; + +struct TORCH_API conv2d_padding { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::string_view, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::conv2d"; + static constexpr const char* overload_name = "padding"; + static constexpr const char* schema_str = "conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding=\"valid\", SymInt[2] dilation=1, SymInt groups=1) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bac6b681b6f2487bb703b879fc78c8c3c4a3d13f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & conv_depthwise3d_out_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, at::Tensor & out); +TORCH_API at::Tensor conv_depthwise3d_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d3d06535771b0bd95ad80fcdf03d2edf525965c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple conv_tbc_backward(const at::Tensor & self, const at::Tensor & input, const at::Tensor & weight, const at::Tensor & bias, int64_t pad); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..381b8a6d657baaa9d4c4fdd1a7278c5e85e240a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple conv_tbc_backward(const at::Tensor & self, const at::Tensor & input, const at::Tensor & weight, const at::Tensor & bias, int64_t pad); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cdbcba45eb98db013b8eda0ee0e53097d025d8d8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conv_tbc(const at::Tensor & self, const at::Tensor & weight, const at::Tensor & bias, int64_t pad=0); +TORCH_API at::Tensor & conv_tbc_out(const at::Tensor & self, const at::Tensor & weight, const at::Tensor & bias, int64_t pad, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_transpose3d_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_transpose3d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..60ca0e61a455a83e54c3b307981471cba5f2c1c1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/conv_transpose3d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor conv_transpose3d(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias={}, at::IntArrayRef stride=1, at::IntArrayRef padding=0, at::IntArrayRef output_padding=0, int64_t groups=1, at::IntArrayRef dilation=1); +TORCH_API at::Tensor conv_transpose3d_symint(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::SymIntArrayRef padding=c10::SymInt(0), c10::SymIntArrayRef output_padding=c10::SymInt(0), c10::SymInt groups=1, c10::SymIntArrayRef dilation=c10::SymInt(1)); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..134b936ef05a0cb9abd6339723d70ac144f08e3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API convolution { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::convolution"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); +}; + +struct TORCH_API convolution_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, c10::SymIntArrayRef, c10::SymInt, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::convolution"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e228444ecd90c6f91660e1386b5f8d01f5e25aca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & copy_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & src, bool non_blocking=false); +TORCH_API at::Tensor & copy_outf(const at::Tensor & self, const at::Tensor & src, bool non_blocking, at::Tensor & out); +TORCH_API at::Tensor & copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking=false); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign.h new file mode 100644 index 0000000000000000000000000000000000000000..263c3a531525c24cf6026a55d65e1368208a6459 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & copysign_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::copysign_out::call(self, other, out); +} +// aten::copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & copysign_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::copysign_out::call(self, other, out); +} + +// aten::copysign.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor copysign(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::copysign_Tensor::call(self, other); +} + +// aten::copysign.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor copysign(const at::Tensor & self, const at::Scalar & other) { + return at::_ops::copysign_Scalar::call(self, other); +} + +// aten::copysign.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & copysign_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) { + return at::_ops::copysign_Scalar_out::call(self, other, out); +} +// aten::copysign.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & copysign_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) { + return at::_ops::copysign_Scalar_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b0bbec31ce27311329a7be9bf1f8b14f417289f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor copysign(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & copysign_(at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..780413f313451ff7cb8dd77fe7f1106af50b3b47 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/copysign_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor copysign(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & copysign_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & copysign_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & copysign_(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/corrcoef_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/corrcoef_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3630c3342545a5af0aa3d9f69219d69dfa0b3c2f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/corrcoef_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor corrcoef(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cos.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cos.h new file mode 100644 index 0000000000000000000000000000000000000000..e3b57b39b0a4aa3713cdbba84256fabcb7ea76b3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cos.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cos(Tensor self) -> Tensor +inline at::Tensor cos(const at::Tensor & self) { + return at::_ops::cos::call(self); +} + +// aten::cos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & cos_(at::Tensor & self) { + return at::_ops::cos_::call(self); +} + +// aten::cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cos_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::cos_out::call(self, out); +} +// aten::cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cos_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::cos_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cosh_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cosh_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..37a962fd77a064e9584857d9f82e9a748b90e38b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cosh_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor cosh(const at::Tensor & self); +TORCH_API at::Tensor & cosh_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h new file mode 100644 index 0000000000000000000000000000000000000000..ebd7dfdca74b50d8b17a5a66da99e39788464f5e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor +inline at::Tensor ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank=0, int64_t reduction=at::Reduction::Mean, bool zero_infinity=false) { + return at::_ops::ctc_loss_IntList::call(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity); +} + +// aten::ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor +inline at::Tensor ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank=0, int64_t reduction=at::Reduction::Mean, bool zero_infinity=false) { + return at::_ops::ctc_loss_Tensor::call(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..4ca9576132b56673ef462af6846f058dd766212f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cudnn_affine_grid_generator_backward(Tensor grad, int N, int C, int H, int W) -> Tensor grad_theta +inline at::Tensor cudnn_affine_grid_generator_backward(const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W) { + return at::_ops::cudnn_affine_grid_generator_backward::call(grad, N, C, H, W); +} + +// aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cudnn_affine_grid_generator_backward_out(at::Tensor & out, const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W) { + return at::_ops::cudnn_affine_grid_generator_backward_out::call(grad, N, C, H, W, out); +} +// aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cudnn_affine_grid_generator_backward_outf(const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W, at::Tensor & out) { + return at::_ops::cudnn_affine_grid_generator_backward_out::call(grad, N, C, H, W, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h new file mode 100644 index 0000000000000000000000000000000000000000..acfcf35b35e7158a64048ef756d3b29b6bef65e2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & cudnn_convolution_relu_out_symint(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, at::Tensor & out); +TORCH_API at::Tensor cudnn_convolution_relu(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, int64_t groups); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h new file mode 100644 index 0000000000000000000000000000000000000000..c3f3994541ca499a9b817efe9b92a849e7765ddc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cudnn_is_acceptable(Tensor self) -> bool +inline bool cudnn_is_acceptable(const at::Tensor & self) { + return at::_ops::cudnn_is_acceptable::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7c500c0e015fb04f0bbc470a8efa363be0ccdee6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cudnn_is_acceptable { + using schema = bool (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cudnn_is_acceptable"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cudnn_is_acceptable(Tensor self) -> bool"; + static bool call(const at::Tensor & self); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9fcdafa687834123962c728e217f21fe416ecf5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cummaxmin_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummaxmin_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & input, const at::Tensor & indices, int64_t dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & input, const at::Tensor & indices, int64_t dim); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7e9d7a60c670bd31c0f818a69de5816daf539dee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple cummin(const at::Tensor & self, int64_t dim); +TORCH_API ::std::tuple cummin_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim); +TORCH_API ::std::tuple cummin_outf(const at::Tensor & self, int64_t dim, at::Tensor & values, at::Tensor & indices); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..904d5cddde50aea3cd9166f3bf852103d8ddd02a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cummin { + using schema = ::std::tuple (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummin"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)"; + static ::std::tuple call(const at::Tensor & self, int64_t dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim); +}; + +struct TORCH_API cummin_out { + using schema = ::std::tuple (const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummin"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "cummin.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)"; + static ::std::tuple call(const at::Tensor & self, int64_t dim, at::Tensor & values, at::Tensor & indices); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, at::Tensor & values, at::Tensor & indices); +}; + +struct TORCH_API cummin_dimname { + using schema = ::std::tuple (const at::Tensor &, at::Dimname); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummin"; + static constexpr const char* overload_name = "dimname"; + static constexpr const char* schema_str = "cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)"; + static ::std::tuple call(const at::Tensor & self, at::Dimname dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim); +}; + +struct TORCH_API cummin_dimname_out { + using schema = ::std::tuple (const at::Tensor &, at::Dimname, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummin"; + static constexpr const char* overload_name = "dimname_out"; + static constexpr const char* schema_str = "cummin.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)"; + static ::std::tuple call(const at::Tensor & self, at::Dimname dim, at::Tensor & values, at::Tensor & indices); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, at::Tensor & values, at::Tensor & indices); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f953894baab92cd6a147dc70a98bc0f87b994ba7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_cumprod_out : public at::meta::structured_cumprod { +void impl(const at::Tensor & self, int64_t dim, ::std::optional dtype, const at::Tensor & out); +}; +TORCH_API at::Tensor cumprod(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & cumprod_out(const at::Tensor & self, at::Dimname dim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor & cumprod_(at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumulative_trapezoid.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumulative_trapezoid.h new file mode 100644 index 0000000000000000000000000000000000000000..f38375efd314b7ff726623b91e25b5730466b6eb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/cumulative_trapezoid.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cumulative_trapezoid.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor +inline at::Tensor cumulative_trapezoid(const at::Tensor & y, const at::Tensor & x, int64_t dim=-1) { + return at::_ops::cumulative_trapezoid_x::call(y, x, dim); +} + +// aten::cumulative_trapezoid.dx(Tensor y, *, Scalar dx=1, int dim=-1) -> Tensor +inline at::Tensor cumulative_trapezoid(const at::Tensor & y, const at::Scalar & dx=1, int64_t dim=-1) { + return at::_ops::cumulative_trapezoid_dx::call(y, dx, dim); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..42a08ed1c65b7364488dd0e7acb6ceb2b0ced8a2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API diagonal_copy { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::diagonal_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +}; + +struct TORCH_API diagonal_copy_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, int64_t, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::diagonal_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..144dc620cb7d2121a4fa09f71b349906c4194706 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor div(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & div_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & div_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & div_(at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor div(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +TORCH_API at::Tensor & div_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +TORCH_API at::Tensor & div_outf(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode, at::Tensor & out); +TORCH_API at::Tensor & div_(at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3cee09ff5f2ae66a6ec056227f02b0a979ae8de8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & dot_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & tensor); +TORCH_API at::Tensor & dot_outf(const at::Tensor & self, const at::Tensor & tensor, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f18f4fea07d0c4eea157a299bf46b954e0a85405 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor dropout(const at::Tensor & input, double p, bool train); +TORCH_API at::Tensor & dropout_(at::Tensor & self, double p, bool train); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_permuted.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_permuted.h new file mode 100644 index 0000000000000000000000000000000000000000..41e1f9c6adcc12c2892b07e6b8ccd99ed67aad58 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_permuted.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_permuted(at::IntArrayRef size, at::IntArrayRef physical_layout, at::TensorOptions options={}) { + return at::_ops::empty_permuted::call(c10::fromIntArrayRefSlow(size), physical_layout, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_permuted(at::IntArrayRef size, at::IntArrayRef physical_layout, at::TensorOptions options={}) { + return at::_ops::empty_permuted::call(c10::fromIntArrayRefSlow(size), physical_layout, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_permuted(at::IntArrayRef size, at::IntArrayRef physical_layout, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_permuted::call(c10::fromIntArrayRefSlow(size), physical_layout, dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_permuted(at::IntArrayRef size, at::IntArrayRef physical_layout, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_permuted::call(c10::fromIntArrayRefSlow(size), physical_layout, dtype, layout, device, pin_memory); + } +} + +// aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_permuted_symint(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, at::TensorOptions options={}) { + return at::_ops::empty_permuted::call(size, physical_layout, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_permuted(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, at::TensorOptions options={}) { + return at::_ops::empty_permuted::call(size, physical_layout, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_permuted_symint(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_permuted::call(size, physical_layout, dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_permuted(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_permuted::call(size, physical_layout, dtype, layout, device, pin_memory); + } +} + +// aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_permuted_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef physical_layout) { + return at::_ops::empty_permuted_out::call(c10::fromIntArrayRefSlow(size), physical_layout, out); +} +namespace symint { + template >> + at::Tensor & empty_permuted_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef physical_layout) { + return at::_ops::empty_permuted_out::call(c10::fromIntArrayRefSlow(size), physical_layout, out); + } +} + +// aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_permuted_outf(at::IntArrayRef size, at::IntArrayRef physical_layout, at::Tensor & out) { + return at::_ops::empty_permuted_out::call(c10::fromIntArrayRefSlow(size), physical_layout, out); +} +namespace symint { + template >> + at::Tensor & empty_permuted_outf(at::IntArrayRef size, at::IntArrayRef physical_layout, at::Tensor & out) { + return at::_ops::empty_permuted_out::call(c10::fromIntArrayRefSlow(size), physical_layout, out); + } +} + +// aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_permuted_symint_out(at::Tensor & out, c10::SymIntArrayRef size, at::IntArrayRef physical_layout) { + return at::_ops::empty_permuted_out::call(size, physical_layout, out); +} +namespace symint { + template >> + at::Tensor & empty_permuted_out(at::Tensor & out, c10::SymIntArrayRef size, at::IntArrayRef physical_layout) { + return at::_ops::empty_permuted_out::call(size, physical_layout, out); + } +} + +// aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_permuted_symint_outf(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, at::Tensor & out) { + return at::_ops::empty_permuted_out::call(size, physical_layout, out); +} +namespace symint { + template >> + at::Tensor & empty_permuted_outf(c10::SymIntArrayRef size, at::IntArrayRef physical_layout, at::Tensor & out) { + return at::_ops::empty_permuted_out::call(size, physical_layout, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h new file mode 100644 index 0000000000000000000000000000000000000000..f1ba8750379010822227f402b86b923b96ff25ad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(size, stride, dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(size, stride, dtype, layout, device, pin_memory); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef stride) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); +} +namespace symint { + template >> + at::Tensor & empty_strided_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef stride) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_outf(at::IntArrayRef size, at::IntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); +} +namespace symint { + template >> + at::Tensor & empty_strided_outf(at::IntArrayRef size, at::IntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_symint_out(at::Tensor & out, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) { + return at::_ops::empty_strided_out::call(size, stride, out); +} +namespace symint { + template >> + at::Tensor & empty_strided_out(at::Tensor & out, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) { + return at::_ops::empty_strided_out::call(size, stride, out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_symint_outf(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(size, stride, out); +} +namespace symint { + template >> + at::Tensor & empty_strided_outf(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(size, stride, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf.h new file mode 100644 index 0000000000000000000000000000000000000000..55c70e5dc1a40608b648f6262de58833c8936dd3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::erf(Tensor self) -> Tensor +inline at::Tensor erf(const at::Tensor & self) { + return at::_ops::erf::call(self); +} + +// aten::erf_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & erf_(at::Tensor & self) { + return at::_ops::erf_::call(self); +} + +// aten::erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & erf_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::erf_out::call(self, out); +} +// aten::erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & erf_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::erf_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3b771451bfac25a8aaef767d0e826818d12ca2ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor erf(const at::Tensor & self); +TORCH_API at::Tensor & erf_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & erf_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & erf_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erfc.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erfc.h new file mode 100644 index 0000000000000000000000000000000000000000..9adad39bd275f3c56e168edfa9878a492ef2655e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/erfc.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::erfc(Tensor self) -> Tensor +inline at::Tensor erfc(const at::Tensor & self) { + return at::_ops::erfc::call(self); +} + +// aten::erfc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & erfc_(at::Tensor & self) { + return at::_ops::erfc_::call(self); +} + +// aten::erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & erfc_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::erfc_out::call(self, out); +} +// aten::erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & erfc_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::erfc_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b8055313fe154d751b0c352f5aa41ca7db459f3b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor exp2(const at::Tensor & self); +TORCH_API at::Tensor & exp2_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..d8237b7584541cf7a47d7a000978ed47f43fbdb4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_exp2 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ef99b4ae864e7d7675d720ffe2bfc21a34615961 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/exp_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor exp(const at::Tensor & self); +TORCH_API at::Tensor & exp_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_as.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_as.h new file mode 100644 index 0000000000000000000000000000000000000000..7f53733878e86a0637cde422b42bc9c55e747b08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_as.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_copy.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..aad210234e89373f36f9d5a757ebc83ed0a51fb4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_copy.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::expand_copy(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor +inline at::Tensor expand_copy(const at::Tensor & self, at::IntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy::call(self, c10::fromIntArrayRefSlow(size), implicit); +} +namespace symint { + template >> + at::Tensor expand_copy(const at::Tensor & self, at::IntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy::call(self, c10::fromIntArrayRefSlow(size), implicit); + } +} + +// aten::expand_copy(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor +inline at::Tensor expand_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy::call(self, size, implicit); +} +namespace symint { + template >> + at::Tensor expand_copy(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy::call(self, size, implicit); + } +} + +// aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & expand_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy_out::call(self, c10::fromIntArrayRefSlow(size), implicit, out); +} +namespace symint { + template >> + at::Tensor & expand_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy_out::call(self, c10::fromIntArrayRefSlow(size), implicit, out); + } +} + +// aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & expand_copy_outf(const at::Tensor & self, at::IntArrayRef size, bool implicit, at::Tensor & out) { + return at::_ops::expand_copy_out::call(self, c10::fromIntArrayRefSlow(size), implicit, out); +} +namespace symint { + template >> + at::Tensor & expand_copy_outf(const at::Tensor & self, at::IntArrayRef size, bool implicit, at::Tensor & out) { + return at::_ops::expand_copy_out::call(self, c10::fromIntArrayRefSlow(size), implicit, out); + } +} + +// aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & expand_copy_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy_out::call(self, size, implicit, out); +} +namespace symint { + template >> + at::Tensor & expand_copy_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, bool implicit=false) { + return at::_ops::expand_copy_out::call(self, size, implicit, out); + } +} + +// aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & expand_copy_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit, at::Tensor & out) { + return at::_ops::expand_copy_out::call(self, size, implicit, out); +} +namespace symint { + template >> + at::Tensor & expand_copy_outf(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit, at::Tensor & out) { + return at::_ops::expand_copy_out::call(self, size, implicit, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h new file mode 100644 index 0000000000000000000000000000000000000000..21f799764ba3d31bcc9af5b9e2f81f6fbc6b808d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor expand(const at::Tensor & self, at::IntArrayRef size, bool implicit=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..26e8243c0cf28d280c524473134d6635cf491a68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor expm1(const at::Tensor & self); +TORCH_API at::Tensor & expm1_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h new file mode 100644 index 0000000000000000000000000000000000000000..e79ee036d31599769800fcae3693ca3e1504bbe3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor +inline at::Tensor fake_quantize_per_channel_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max) { + return at::_ops::fake_quantize_per_channel_affine::call(self, scale, zero_point, axis, quant_min, quant_max); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..f1b5e62650eb2d006d436392a10e0129720b8910 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor +inline at::Tensor fake_quantize_per_channel_affine_cachemask_backward(const at::Tensor & grad, const at::Tensor & mask) { + return at::_ops::fake_quantize_per_channel_affine_cachemask_backward::call(grad, mask); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4cde7f2bfc71719a1f6fe7f268c9cc66fef6d549 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fake_quantize_per_channel_affine_cachemask_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fake_quantize_per_channel_affine_cachemask_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & mask); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & mask); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_tensor_affine_cachemask_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_tensor_affine_cachemask_native.h new file mode 100644 index 0000000000000000000000000000000000000000..96be82bca4b4f0bc49b3960279def1286443cf55 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_tensor_affine_cachemask_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple fake_quantize_per_tensor_affine_cachemask_out(const at::Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple fake_quantize_per_tensor_affine_cachemask(const at::Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h new file mode 100644 index 0000000000000000000000000000000000000000..6e381fa3b9b06b6796e09186f55367aa9861755f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor +inline at::Tensor fbgemm_linear_fp16_weight(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias) { + return at::_ops::fbgemm_linear_fp16_weight::call(input, packed_weight, bias); +} + +// aten::fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor +inline at::Tensor fbgemm_linear_fp16_weight(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias, at::Tensor & output) { + return at::_ops::fbgemm_linear_fp16_weight_out::call(input, packed_weight, bias, output); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6cf2bab0c511150895282cb4177c470850bc15c1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fbgemm_linear_fp16_weight { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fbgemm_linear_fp16_weight"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias); +}; + +struct TORCH_API fbgemm_linear_fp16_weight_out { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fbgemm_linear_fp16_weight"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias, at::Tensor & output); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias, at::Tensor & output); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_quantize_weight_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_quantize_weight_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6c9fcfa51b0eb1ea590a76a21f56af2cc68c99aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_quantize_weight_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple fbgemm_linear_quantize_weight(const at::Tensor & input); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16.h new file mode 100644 index 0000000000000000000000000000000000000000..bad86f420339b70d1e22ec7c959028a42859ab86 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor +inline at::Tensor fbgemm_pack_gemm_matrix_fp16(const at::Tensor & input) { + return at::_ops::fbgemm_pack_gemm_matrix_fp16::call(input); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1fa34be058324ab99dce88cd79e645cbbeff5a04 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fbgemm_pack_gemm_matrix_fp16(const at::Tensor & input); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..58de921acd450a02f7e9039adf7386fb49de53c0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor +inline at::Tensor feature_alpha_dropout(const at::Tensor & input, double p, bool train) { + return at::_ops::feature_alpha_dropout::call(input, p, train); +} + +// aten::feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!) +inline at::Tensor & feature_alpha_dropout_(at::Tensor & self, double p, bool train) { + return at::_ops::feature_alpha_dropout_::call(self, p, train); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a673ac4d432654908dc92b214d1aa5309ccae148 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_fftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_fftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftshift_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftshift_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1d24b9ed4c49755e0fc3fc3032f5200e5e102f5d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftshift_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fft_fftshift(const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2.h new file mode 100644 index 0000000000000000000000000000000000000000..17d27d9997a34c3299263fbb5aec53ac6602c2fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor +inline at::Tensor fft_hfft2(const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm); +} +namespace symint { + template >> + at::Tensor fft_hfft2(const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm); + } +} + +// aten::fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor +inline at::Tensor fft_hfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2::call(self, s, dim, norm); +} +namespace symint { + template >> + at::Tensor fft_hfft2(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2::call(self, s, dim, norm); + } +} + +// aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2_out::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm, out); +} +namespace symint { + template >> + at::Tensor & fft_hfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2_out::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm, out); + } +} + +// aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft2_out::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm, out); +} +namespace symint { + template >> + at::Tensor & fft_hfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft2_out::call(self, s.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*s)) : ::std::nullopt, dim, norm, out); + } +} + +// aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft2_symint_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2_out::call(self, s, dim, norm, out); +} +namespace symint { + template >> + at::Tensor & fft_hfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt) { + return at::_ops::fft_hfft2_out::call(self, s, dim, norm, out); + } +} + +// aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft2_symint_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft2_out::call(self, s, dim, norm, out); +} +namespace symint { + template >> + at::Tensor & fft_hfft2_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft2_out::call(self, s, dim, norm, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2f20cf099f42f8ec66735edb418d3bff14415599 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fft_hfft2(const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor fft_hfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); +TORCH_API at::Tensor & fft_hfft2_symint_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_symint_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ba37c5581f0f32db75e32a36f6b847ce184fec46 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fft_hfft(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor fft_hfft_symint(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft_outf(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); +TORCH_API at::Tensor & fft_hfft_symint_out(at::Tensor & out, const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft_symint_outf(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3d906c1c182582e983f4d782966f6ab353e28ad7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_hfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3e0f18274f94e7c5d2c8830626f51abb7c8e6b1d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_ifft2 { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifft2"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm); +}; + +struct TORCH_API fft_ifft2_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalSymIntArrayRef, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifft2"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift.h new file mode 100644 index 0000000000000000000000000000000000000000..95e060c1912a0a52cc2dd1339d2cecf86144bdf2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fft_ifftshift(Tensor self, int[1]? dim=None) -> Tensor +inline at::Tensor fft_ifftshift(const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt) { + return at::_ops::fft_ifftshift::call(self, dim); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8b01459753d7e46527d9b4f1766dbba312de2ccb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_ifftshift { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifftshift"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fft_ifftshift(Tensor self, int[1]? dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalIntArrayRef dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ihfft_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ihfft_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..36f1361d5a8fbe3703f0d7d653bb9506e26e0242 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ihfft_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fft_ihfft(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor fft_ihfft_symint(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_ihfft_out(at::Tensor & out, const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_ihfft_outf(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); +TORCH_API at::Tensor & fft_ihfft_symint_out(at::Tensor & out, const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_ihfft_symint_outf(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_irfft_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_irfft_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..feedce91adbed04b597763ac789330ab9f60b516 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_irfft_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_irfft { + using schema = at::Tensor (const at::Tensor &, ::std::optional, int64_t, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_irfft"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fft_irfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); +}; + +struct TORCH_API fft_irfft_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, int64_t, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_irfft"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "fft_irfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h new file mode 100644 index 0000000000000000000000000000000000000000..04770a370b486d9378b3bf92b597e145063c4411 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_rfft_symint(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_rfft_symint_out(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flatten_dense_tensors_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flatten_dense_tensors_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4edb5a6df4da0a0a31532d56587e96a58888dd7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flatten_dense_tensors_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor flatten_dense_tensors(at::TensorList tensors); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h new file mode 100644 index 0000000000000000000000000000000000000000..060abf43906bae5741f7183fbe4772a36868bc33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & flip_out(const at::Tensor & self, at::IntArrayRef dims, at::Tensor & out); +TORCH_API at::Tensor flip(const at::Tensor & self, at::IntArrayRef dims); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr.h new file mode 100644 index 0000000000000000000000000000000000000000..bf3420d44a49a1fd5601ef3a2f8498a637e3f3cc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fliplr(Tensor self) -> Tensor +inline at::Tensor fliplr(const at::Tensor & self) { + return at::_ops::fliplr::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b94a6bcda4cc7652046e308200d967cc3ab30b18 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fliplr(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..45204d00306faeec727197d013ef76ccdb4db293 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor float_power(const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Scalar & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Scalar & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Scalar & exponent); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/floor_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/floor_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f6321a4b0c92312dc3379e20387b6a40707e0a00 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/floor_native.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_floor_out : public at::meta::structured_floor { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +TORCH_API at::Tensor floor_sparse(const at::Tensor & self); +TORCH_API at::Tensor & floor_sparse_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & floor_sparse_(at::Tensor & self); +TORCH_API at::Tensor floor_sparse_csr(const at::Tensor & self); +TORCH_API at::Tensor & floor_sparse_csr_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & floor_sparse_csr_(at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h new file mode 100644 index 0000000000000000000000000000000000000000..3e68f0f179dede53b9892f6d1d3267dae056f8fd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fmin(Tensor self, Tensor other) -> Tensor +inline at::Tensor fmin(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::fmin::call(self, other); +} + +// aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fmin_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::fmin_out::call(self, other, out); +} +// aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fmin_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::fmin_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5cccb965fed3dcff4075376b93069a57217c470a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fmin_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor fmin(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & fmin_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & fmin_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4def4dc1523f1c064cfc32fd2858d29a7620b2e3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..06a961b2b0e36f42eeaad6f2e2ced82fcfa142f3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8b535b7ccfe23f00fa4e45d67b5d950d7609ecad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_fractional_max_pool2d_backward_cpu : public at::meta::structured_fractional_max_pool2d_backward { +void impl(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, const at::Tensor & grad_input); +}; +struct TORCH_API structured_fractional_max_pool2d_backward_cuda : public at::meta::structured_fractional_max_pool2d_backward { +void impl(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..5a99191a799e689339d55c8ee29cc3b3cb3390be --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & fractional_max_pool3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) { + return at::_ops::fractional_max_pool3d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input); +} +// aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & fractional_max_pool3d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input) { + return at::_ops::fractional_max_pool3d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input); +} + +// aten::fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor +inline at::Tensor fractional_max_pool3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) { + return at::_ops::fractional_max_pool3d_backward::call(grad_output, self, kernel_size, output_size, indices); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/frexp_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/frexp_native.h new file mode 100644 index 0000000000000000000000000000000000000000..120d97ba54d7f5ceec6904588c956424ca8ee335 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/frexp_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple frexp(const at::Tensor & self); +TORCH_API ::std::tuple frexp_out(const at::Tensor & self, at::Tensor & mantissa, at::Tensor & exponent); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/from_file_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/from_file_native.h new file mode 100644 index 0000000000000000000000000000000000000000..42dc462c099ae6ae4250627b45dd89bc92bfd8cb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/from_file_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & from_file_out(c10::string_view filename, ::std::optional shared, ::std::optional size, at::Tensor & out); +TORCH_API at::Tensor from_file(c10::string_view filename, ::std::optional shared=::std::nullopt, ::std::optional size=0, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..651d919dd5b74c93491c9bba8ab4fce157ee7e8d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fused_moving_avg_obs_fake_quant { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, double, int64_t, int64_t, int64_t, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fused_moving_avg_obs_fake_quant"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fused_moving_avg_obs_fake_quant(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & observer_on, const at::Tensor & fake_quant_on, at::Tensor & running_min, at::Tensor & running_max, at::Tensor & scale, at::Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & observer_on, const at::Tensor & fake_quant_on, at::Tensor & running_min, at::Tensor & running_max, at::Tensor & scale, at::Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gcd_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gcd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e77c8557e69a81ec8e3785a22b37ae09e32f4d34 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gcd_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API gcd_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::gcd"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "gcd.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API gcd { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::gcd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "gcd(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API gcd_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::gcd_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d55c748171314102282ebfea25c3e721749a6f1c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor ge(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor ge(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a51b938b053c02fc05eb1eca1479c7167dd98c33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor gelu(const at::Tensor & self, c10::string_view approximate="none"); +TORCH_API at::Tensor & gelu_(at::Tensor & self, c10::string_view approximate="none"); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..41df42c6638153c20727e45595bb39d74a99e3f3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/gelu_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_gelu : public TensorIteratorBase { + + + void meta(const at::Tensor & self, c10::string_view approximate); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c917b0da093e3d77a3d0f42ab7905724fe593e6e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & geometric_(at::Tensor & self, double p, ::std::optional generator=::std::nullopt); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geqrf_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geqrf_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..df82e0900620228dfe7b8a0f54e4d42c379b3b33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/geqrf_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple geqrf(const at::Tensor & self); +TORCH_API ::std::tuple geqrf_out(at::Tensor & a, at::Tensor & tau, const at::Tensor & self); +TORCH_API ::std::tuple geqrf_outf(const at::Tensor & self, at::Tensor & a, at::Tensor & tau); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8479f9f11a18bf01f994889c382dff35b1af7e9e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor glu_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & glu_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & glu_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d1798fe8eeba666c48265b639dee858e34ce3804 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor glu_backward_jvp(const at::Tensor & grad_x, const at::Tensor & grad_glu, const at::Tensor & x, const at::Tensor & dgrad_glu, const at::Tensor & dx, int64_t dim); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..62b26abe272cabf1f49a56023c8e388eee6545d7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_2d_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_2d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c9490c3e2b64d77be5f7bdf31073864716870299 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_2d_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor grid_sampler_2d(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..af002d20ecdad821450f1f9d0bd5761440696588 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & grid_sampler_3d_out(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out); +TORCH_API at::Tensor grid_sampler_3d_cpu(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +TORCH_API at::Tensor grid_sampler_3d_cuda(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..addcbe7a37767b2086812a9e6ebc920e207e5f15 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API grid_sampler_3d { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::grid_sampler_3d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +}; + +struct TORCH_API grid_sampler_3d_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::grid_sampler_3d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "grid_sampler_3d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..31719de4db1ddb7553889fb28f4f723aa732023e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API grid_sampler { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::grid_sampler"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..63932a855ec5f456a085bd1f7b70ee4a04d18232 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor +inline at::Tensor group_norm(const at::Tensor & input, int64_t num_groups, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enabled=true) { + return at::_ops::group_norm::call(input, num_groups, weight, bias, eps, cudnn_enabled); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h new file mode 100644 index 0000000000000000000000000000000000000000..d36dc25f3797c7312861e189b6ccd0888b863a4d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::hann_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::hann_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::hann_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::hann_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::hann_window_out::call(window_length, out); +} +// aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::hann_window_out::call(window_length, out); +} + +// aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::hann_window_periodic_out::call(window_length, periodic, out); +} +// aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::hann_window_periodic_out::call(window_length, periodic, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardshrink_backward_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardshrink_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cfd9db393e95db6d2ce676057fb70e41ef62b31c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardshrink_backward_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor hardshrink_backward(const at::Tensor & grad_out, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & hardshrink_backward_out(at::Tensor & grad_input, const at::Tensor & grad_out, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & hardshrink_backward_outf(const at::Tensor & grad_out, const at::Tensor & self, const at::Scalar & lambd, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardswish_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardswish_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fe4ce92382fd253ec12592c9882f1fb31729d939 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardswish_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & hardswish_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9f2c65dc635792cd263f634d09996d8a4123e0dd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_backward_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor hardtanh_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); +TORCH_API at::Tensor & hardtanh_backward_out(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & grad_input); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1ce85b5140c3637a1df3596eef21e2ba361f55e5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & hardtanh_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h new file mode 100644 index 0000000000000000000000000000000000000000..693598cca66a3e20f27f08c6514e68f4b76b96f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor hardtanh(const at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor & hardtanh_out(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); +TORCH_API at::Tensor & hardtanh_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor hardtanh_quantized_cpu(const at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor & hardtanh_out_quantized_cpu(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); +TORCH_API at::Tensor & hardtanh_quantized_cpu_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e317ef56d6fec95e64aa365026d57d992e5874f0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API hardtanh_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hardtanh"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); +}; + +struct TORCH_API hardtanh { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hardtanh"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); +}; + +struct TORCH_API hardtanh_ { + using schema = at::Tensor & (at::Tensor &, const at::Scalar &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hardtanh_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ad770ae3a3a7bbb4f9e9bef2431660db3a9ca789 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API hash_tensor { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, bool, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hash_tensor"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode); +}; + +struct TORCH_API hash_tensor_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, bool, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hash_tensor"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "hash_tensor.out(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..338fc0aad25a4c41f04207397cb52340ff25cce5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor heaviside(const at::Tensor & self, const at::Tensor & values); +TORCH_API at::Tensor & heaviside_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & values); +TORCH_API at::Tensor & heaviside_outf(const at::Tensor & self, const at::Tensor & values, at::Tensor & out); +TORCH_API at::Tensor & heaviside_(at::Tensor & self, const at::Tensor & values); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cae8e7f3c49d558f8c654a65e09b069e90826b82 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_heaviside_out : public at::meta::structured_heaviside { +void impl(const at::Tensor & self, const at::Tensor & values, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histc.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histc.h new file mode 100644 index 0000000000000000000000000000000000000000..d6415a64ba53ae5784d44a318e4fc8fd0db56212 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histc.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & histc_out(at::Tensor & out, const at::Tensor & self, int64_t bins=100, const at::Scalar & min=0, const at::Scalar & max=0) { + return at::_ops::histc_out::call(self, bins, min, max, out); +} +// aten::histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & histc_outf(const at::Tensor & self, int64_t bins, const at::Scalar & min, const at::Scalar & max, at::Tensor & out) { + return at::_ops::histc_out::call(self, bins, min, max, out); +} + +// aten::histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor +inline at::Tensor histc(const at::Tensor & self, int64_t bins=100, const at::Scalar & min=0, const at::Scalar & max=0) { + return at::_ops::histc::call(self, bins, min, max); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogram_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogram_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..38e84158e9193dfa0c512e3b20874007cc74cd28 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogram_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API histogram_bins_tensor_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const ::std::optional &, bool, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogram"; + static constexpr const char* overload_name = "bins_tensor_out"; + static constexpr const char* schema_str = "histogram.bins_tensor_out(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & bins, const ::std::optional & weight, bool density, at::Tensor & hist, at::Tensor & bin_edges); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & bins, const ::std::optional & weight, bool density, at::Tensor & hist, at::Tensor & bin_edges); +}; + +struct TORCH_API histogram_bins_tensor { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogram"; + static constexpr const char* overload_name = "bins_tensor"; + static constexpr const char* schema_str = "histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & bins, const ::std::optional & weight, bool density); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & bins, const ::std::optional & weight, bool density); +}; + +struct TORCH_API histogram_bin_ct_out { + using schema = ::std::tuple (const at::Tensor &, int64_t, ::std::optional>, const ::std::optional &, bool, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogram"; + static constexpr const char* overload_name = "bin_ct_out"; + static constexpr const char* schema_str = "histogram.bin_ct_out(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)"; + static ::std::tuple call(const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density, at::Tensor & hist, at::Tensor & bin_edges); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density, at::Tensor & hist, at::Tensor & bin_edges); +}; + +struct TORCH_API histogram_bin_ct { + using schema = ::std::tuple (const at::Tensor &, int64_t, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogram"; + static constexpr const char* overload_name = "bin_ct"; + static constexpr const char* schema_str = "histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)"; + static ::std::tuple call(const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..35a5ce667c80a4770478fafedbdaa506f7f8eac2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API histogramdd { + using schema = ::std::tuple> (const at::Tensor &, at::IntArrayRef, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, at::IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +struct TORCH_API histogramdd_int_bins { + using schema = ::std::tuple> (const at::Tensor &, int64_t, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = "int_bins"; + static constexpr const char* schema_str = "histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +struct TORCH_API histogramdd_TensorList_bins { + using schema = ::std::tuple> (const at::Tensor &, at::TensorList, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = "TensorList_bins"; + static constexpr const char* schema_str = "histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, at::TensorList bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::TensorList bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hspmm.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hspmm.h new file mode 100644 index 0000000000000000000000000000000000000000..7925b53bd02e717d987073c6cd77b7433fc428a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hspmm.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hspmm_out(at::Tensor & out, const at::Tensor & mat1, const at::Tensor & mat2) { + return at::_ops::hspmm_out::call(mat1, mat2, out); +} +// aten::hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hspmm_outf(const at::Tensor & mat1, const at::Tensor & mat2, at::Tensor & out) { + return at::_ops::hspmm_out::call(mat1, mat2, out); +} + +// aten::hspmm(Tensor mat1, Tensor mat2) -> Tensor +inline at::Tensor hspmm(const at::Tensor & mat1, const at::Tensor & mat2) { + return at::_ops::hspmm::call(mat1, mat2); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hstack_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hstack_native.h new file mode 100644 index 0000000000000000000000000000000000000000..06d3e29f42375b242c00ba0e5e277043c69ed211 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hstack_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor hstack(at::TensorList tensors); +TORCH_API at::Tensor & hstack_out(at::TensorList tensors, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/huber_loss_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/huber_loss_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..20d2dbf585924d4479877e66895aa73bca6007b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/huber_loss_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor huber_loss(const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean, double delta=1.0); +TORCH_API at::Tensor & huber_loss_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean, double delta=1.0); +TORCH_API at::Tensor & huber_loss_outf(const at::Tensor & self, const at::Tensor & target, int64_t reduction, double delta, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..74c8b0be77d519752e890de2cf6615c7b9f35576 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor hypot(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & hypot_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & hypot_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & hypot_(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e96bd1f435a5b98dedf8d1fe55ee29b620480a3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_igamma_out : public at::meta::structured_igamma { +void impl(const at::Tensor & self, const at::Tensor & other, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igammac_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igammac_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce520b120523704bca5e294be7081d4639e79b81 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/igammac_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor igammac(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & igammac_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & igammac_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & igammac_(at::Tensor & self, const at::Tensor & other); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3ccaedf30dc6cc4ed59a63c003dd5f3ede7905f8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1a94ec88d508f8572f00d492b1356dc66c4ee3f8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor index_fill(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_int_Scalar_out(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value, at::Tensor & out); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor & index_fill_int_Tensor_out(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value, at::Tensor & out); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..17e9c77ec5ed74a4af8186b5534939d43ac966dc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor index_put(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); +TORCH_API at::Tensor & index_put_out(at::Tensor & out, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); +TORCH_API at::Tensor & index_put_outf(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, at::Tensor & out); +TORCH_API at::Tensor & index_put_(at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d07305d002af2c27e41a70427850e2361060b576 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API index_put_ { + using schema = at::Tensor & (at::Tensor &, const c10::List<::std::optional> &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::index_put_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); +}; + +struct TORCH_API index_put { + using schema = at::Tensor (const at::Tensor &, const c10::List<::std::optional> &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::index_put"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); +}; + +struct TORCH_API index_put_out { + using schema = at::Tensor & (const at::Tensor &, const c10::List<::std::optional> &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::index_put"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "index_put.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_reduce_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_reduce_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..069c142d51c2823842c83b2c4c529721f62ecd9a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_reduce_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor index_reduce(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true); +TORCH_API at::Tensor & index_reduce_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true); +TORCH_API at::Tensor & index_reduce_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self, at::Tensor & out); +TORCH_API at::Tensor & index_reduce_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select.h new file mode 100644 index 0000000000000000000000000000000000000000..cb04ff4c5daa6e2f05fa2c270668bf4e012091c2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & index_select_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index) { + return at::_ops::index_select_out::call(self, dim, index, out); +} +// aten::index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & index_select_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, at::Tensor & out) { + return at::_ops::index_select_out::call(self, dim, index, out); +} + +// aten::index_select(Tensor self, int dim, Tensor index) -> Tensor +inline at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index) { + return at::_ops::index_select::call(self, dim, index); +} + +// aten::index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & index_select_out(at::Tensor & out, const at::Tensor & self, at::Dimname dim, const at::Tensor & index) { + return at::_ops::index_select_dimname_out::call(self, dim, index, out); +} +// aten::index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & index_select_outf(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, at::Tensor & out) { + return at::_ops::index_select_dimname_out::call(self, dim, index, out); +} + +// aten::index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor +inline at::Tensor index_select(const at::Tensor & self, at::Dimname dim, const at::Tensor & index) { + return at::_ops::index_select_dimname::call(self, dim, index); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select_backward_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..766d4e11f2a8fc2688f27a8b272b6ebce61495fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/index_select_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor index_select_backward(const at::Tensor & grad, at::IntArrayRef self_sizes, int64_t dim, const at::Tensor & index); +TORCH_API at::Tensor index_select_backward_symint(const at::Tensor & grad, c10::SymIntArrayRef self_sizes, int64_t dim, const at::Tensor & index); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..501687a0a2820e224caa15d8acd770fe7e7fcee6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API indices { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::indices"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "indices(Tensor(a) self) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..daf64dd3e4f40bf57695ec5563c451cf105be18c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API instance_norm { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, bool, double, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::instance_norm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/int_repr_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/int_repr_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6b3f9da6b48624e317d16c3d0ca9ddbedf3488c7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/int_repr_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & int_repr_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor int_repr_quantized_cpu(const at::Tensor & self); +TORCH_API at::Tensor int_repr_quantized_cuda(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/inverse_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/inverse_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9d55bc97c1b7811881ec1d6c5a4b4859602fac0f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/inverse_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API inverse { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::inverse"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "inverse(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API inverse_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::inverse"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_coalesced_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_coalesced_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c956cecf0f43e21669e288a3c31e631a6b458676 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_coalesced_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API is_coalesced { + using schema = bool (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::is_coalesced"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "is_coalesced(Tensor self) -> bool"; + static bool call(const at::Tensor & self); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..11893f51d46b9c84cf31d56f4d36a04235501e24 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API bool is_conj(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_set_to.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_set_to.h new file mode 100644 index 0000000000000000000000000000000000000000..5c38f0be266fa402cf430438c9a16927f08c26ce --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/is_set_to.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan.h new file mode 100644 index 0000000000000000000000000000000000000000..585df45bd4f473bc66d0e4d770e4d14477059bb8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::isnan(Tensor self) -> Tensor +inline at::Tensor isnan(const at::Tensor & self) { + return at::_ops::isnan::call(self); +} + +// aten::isnan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & isnan_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::isnan_out::call(self, out); +} +// aten::isnan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & isnan_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::isnan_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cb0560e95459ff1c2872802e54316150ec9f0c42 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & isnan_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor isnan(const at::Tensor & self); +TORCH_API at::Tensor NestedTensor_isnan(const at::Tensor & self); +TORCH_API at::Tensor isnan_sparse(const at::Tensor & self); +TORCH_API at::Tensor isnan_sparse_csr(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..adcb4df804874bb67c4e3472a74c19a50b5fe104 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor isneginf(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1fca2d43c46720c5e56ad6d78d591605a8e20f0c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/isneginf_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor isneginf(const at::Tensor & self); +TORCH_API at::Tensor & isneginf_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & isneginf_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3f3b6c9bf3672025d659fc572184e85cfbe73597 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Scalar item(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c5a15839c67382a18dacc844d896e1d1459602 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::kaiser_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::kaiser_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, double beta, at::TensorOptions options={}) { + return at::_ops::kaiser_window_beta::call(window_length, periodic, beta, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, double beta, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window_beta::call(window_length, periodic, beta, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::kaiser_window_out::call(window_length, out); +} +// aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::kaiser_window_out::call(window_length, out); +} + +// aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::kaiser_window_periodic_out::call(window_length, periodic, out); +} +// aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::kaiser_window_periodic_out::call(window_length, periodic, out); +} + +// aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length, bool periodic, double beta) { + return at::_ops::kaiser_window_beta_out::call(window_length, periodic, beta, out); +} +// aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, bool periodic, double beta, at::Tensor & out) { + return at::_ops::kaiser_window_beta_out::call(window_length, periodic, beta, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss.h new file mode 100644 index 0000000000000000000000000000000000000000..bd1d7cf35f101ec8cf8852fcc9ca853841edfb3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor +inline at::Tensor l1_loss(const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean) { + return at::_ops::l1_loss::call(self, target, reduction); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..67bce3415bcca7fea22e6d4d1c400c5b7fa535af --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/l1_loss_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API l1_loss { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::l1_loss"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & target, int64_t reduction); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & target, int64_t reduction); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..d2258334049f9cec8e43947c345106fcb9a58fef --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor +inline at::Tensor layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps, cudnn_enable); +} +namespace symint { + template >> + at::Tensor layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps, cudnn_enable); + } +} + +// aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor +inline at::Tensor layer_norm_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, normalized_shape, weight, bias, eps, cudnn_enable); +} +namespace symint { + template >> + at::Tensor layer_norm(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, normalized_shape, weight, bias, eps, cudnn_enable); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lcm_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lcm_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e5e9b8b995c6553442788d83e089e7e92e54dce1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lcm_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor lcm(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & lcm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & lcm_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & lcm_(at::Tensor & self, const at::Tensor & other); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ldexp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ldexp.h new file mode 100644 index 0000000000000000000000000000000000000000..468aeb29878f358da7b56ea8725cd67697ceb208 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ldexp.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::ldexp.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor ldexp(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::ldexp_Tensor::call(self, other); +} + +// aten::ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & ldexp_(at::Tensor & self, const at::Tensor & other) { + return at::_ops::ldexp_::call(self, other); +} + +// aten::ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & ldexp_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::ldexp_out::call(self, other, out); +} +// aten::ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & ldexp_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::ldexp_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3de2fae4937c5b1f5f2317323e3b439a570e001d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_backward_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_leaky_relu_backward_out : public at::meta::structured_leaky_relu_backward { +void impl(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & negative_slope, bool self_is_result, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e84b68ba872bc1251751df0331b116710fe0448b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API leaky_relu_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out); +}; + +struct TORCH_API leaky_relu { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Scalar & negative_slope); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope); +}; + +struct TORCH_API leaky_relu_ { + using schema = at::Tensor & (at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Scalar & negative_slope); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & negative_slope); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7d6759a4a44833268a38b9ebbcd6ba5d738c8283 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor lgamma(const at::Tensor & self); +TORCH_API at::Tensor & lgamma_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & lgamma_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & lgamma_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..258e83e9280e0116f81d5d5071c784706c588331 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_linalg_cholesky_ex : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, bool upper, bool check_errors); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5fc92e82739f2b9ded238821aace98f73e5b7aa0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple linalg_eig(const at::Tensor & self); +TORCH_API ::std::tuple linalg_eig_out(const at::Tensor & self, at::Tensor & eigenvalues, at::Tensor & eigenvectors); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvals_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvals_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e6f2d4310e59d4109ef03d84be607beaf0febbaa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvals_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor linalg_eigvals(const at::Tensor & self); +TORCH_API at::Tensor & linalg_eigvals_out(const at::Tensor & self, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..cab1626fab237400d62b71c60e02a9aa7f8fb451 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_eigvalsh { + using schema = at::Tensor (const at::Tensor &, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_eigvalsh"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_eigvalsh(Tensor self, str UPLO=\"L\") -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view UPLO); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view UPLO); +}; + +struct TORCH_API linalg_eigvalsh_out { + using schema = at::Tensor & (const at::Tensor &, c10::string_view, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_eigvalsh"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_eigvalsh.out(Tensor self, str UPLO=\"L\", *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::string_view UPLO, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view UPLO, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..ea51acdaf43a13e0a1dd339981721afbbfb64fa7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_linalg_inv_ex : public at::impl::MetaBase { + + + void meta(const at::Tensor & A, bool check_errors); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a30f2ee3ae110e9f659b0dd31552f37165f2390e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor linalg_ldl_solve(const at::Tensor & LD, const at::Tensor & pivots, const at::Tensor & B, bool hermitian=false); +TORCH_API at::Tensor & linalg_ldl_solve_out(at::Tensor & out, const at::Tensor & LD, const at::Tensor & pivots, const at::Tensor & B, bool hermitian=false); +TORCH_API at::Tensor & linalg_ldl_solve_outf(const at::Tensor & LD, const at::Tensor & pivots, const at::Tensor & B, bool hermitian, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6992c2f64d5445c2586acc7e830c1b3aab312446 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_ldl_solve_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_linalg_ldl_solve_out : public at::meta::structured_linalg_ldl_solve { +void impl(const at::Tensor & LD, const at::Tensor & pivots, const at::Tensor & B, bool hermitian, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f6b57231823da5054c78a795676f4bad754aec76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple linalg_lu(const at::Tensor & A, bool pivot=true); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cecf9d68f06d407ac76d21da6b6f2a219b32e0e5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple linalg_lu(const at::Tensor & A, bool pivot=true); +TORCH_API ::std::tuple linalg_lu_out(at::Tensor & P, at::Tensor & L, at::Tensor & U, const at::Tensor & A, bool pivot=true); +TORCH_API ::std::tuple linalg_lu_outf(const at::Tensor & A, bool pivot, at::Tensor & P, at::Tensor & L, at::Tensor & U); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5ddeb1c2786a9eca671e882978a624728a510a66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API ::std::tuple linalg_lu_factor_ex(const at::Tensor & A, bool pivot=true, bool check_errors=false); +TORCH_API ::std::tuple linalg_lu_factor_ex_out(at::Tensor & LU, at::Tensor & pivots, at::Tensor & info, const at::Tensor & A, bool pivot=true, bool check_errors=false); +TORCH_API ::std::tuple linalg_lu_factor_ex_outf(const at::Tensor & A, bool pivot, bool check_errors, at::Tensor & LU, at::Tensor & pivots, at::Tensor & info); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..16fecdbbafad271b97f46ed8d8eca1dec61d069b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matmul { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matmul"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matmul(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API linalg_matmul_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matmul"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_exp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_exp.h new file mode 100644 index 0000000000000000000000000000000000000000..7b4f28c116baf903ef3bec9a1fadde362912c0b5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_exp.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_matrix_exp(Tensor self) -> Tensor +inline at::Tensor linalg_matrix_exp(const at::Tensor & self) { + return at::_ops::linalg_matrix_exp::call(self); +} + +// aten::linalg_matrix_exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_matrix_exp_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::linalg_matrix_exp_out::call(self, out); +} +// aten::linalg_matrix_exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_matrix_exp_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::linalg_matrix_exp_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ebb83187fb06cd85e509b2b087d17756f502073d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matrix_power { + using schema = at::Tensor (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_power"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matrix_power(Tensor self, int n) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t n); +}; + +struct TORCH_API linalg_matrix_power_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_power"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t n, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..83a65f2cf6711b7fc75a8b93619e92acfc83b250 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matrix_rank_atol_rtol_tensor { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_tensor_out { + using schema = at::Tensor & (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_tensor_out"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_float { + using schema = at::Tensor (const at::Tensor &, ::std::optional, ::std::optional, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_float"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_float_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, ::std::optional, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_float_out"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank { + using schema = at::Tensor (const at::Tensor &, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, double tol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double tol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_out { + using schema = at::Tensor & (const at::Tensor &, double, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, double tol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double tol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank_tol_tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "tol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & tol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & tol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_out_tol_tensor { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "out_tol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const at::Tensor & tol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & tol, bool hermitian, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..83e522699842fedf212d11236f293d087469fa9e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor linalg_norm(const at::Tensor & self, const ::std::optional & ord=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & linalg_norm_out(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor linalg_norm(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & linalg_norm_out(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2e110380679ebbd2b95ac665460fd472b18b4d66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_norm { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, at::OptionalIntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API linalg_norm_ord_str { + using schema = at::Tensor (const at::Tensor &, c10::string_view, at::OptionalIntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "ord_str"; + static constexpr const char* schema_str = "linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API linalg_norm_out { + using schema = at::Tensor & (const at::Tensor &, const ::std::optional &, at::OptionalIntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +struct TORCH_API linalg_norm_ord_str_out { + using schema = at::Tensor & (const at::Tensor &, c10::string_view, at::OptionalIntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "ord_str_out"; + static constexpr const char* schema_str = "linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h new file mode 100644 index 0000000000000000000000000000000000000000..ed313e3dc976817a8a12457c89aff13d14035920 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h @@ -0,0 +1,87 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_tensor::call(self, atol, rtol, hermitian); +} + +// aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_tensor_out::call(self, atol, rtol, hermitian, out); +} +// aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_atol_rtol_tensor_out::call(self, atol, rtol, hermitian, out); +} + +// aten::linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_float::call(self, atol, rtol, hermitian); +} + +// aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_float_out::call(self, atol, rtol, hermitian, out); +} +// aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_atol_rtol_float_out::call(self, atol, rtol, hermitian, out); +} + +// aten::linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, double rcond, bool hermitian=false) { + return at::_ops::linalg_pinv::call(self, rcond, hermitian); +} + +// aten::linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, const at::Tensor & rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_rcond_tensor::call(self, rcond, hermitian); +} + +// aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, double rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_out::call(self, rcond, hermitian, out); +} +// aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, double rcond, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_out::call(self, rcond, hermitian, out); +} + +// aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_out_rcond_tensor::call(self, rcond, hermitian, out); +} +// aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, const at::Tensor & rcond, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_out_rcond_tensor::call(self, rcond, hermitian, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7b1c3ddb28fcf95569dee03f63a9fedef508bc02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false); +TORCH_API at::Tensor & linalg_pinv_outf(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9683f698a5ff3a74544b12ecc99710898451fec1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_pinv_atol_rtol_tensor { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "atol_rtol_tensor"; + static constexpr const char* schema_str = "linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); +}; + +struct TORCH_API linalg_pinv_atol_rtol_tensor_out { + using schema = at::Tensor & (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "atol_rtol_tensor_out"; + static constexpr const char* schema_str = "linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_pinv_atol_rtol_float { + using schema = at::Tensor (const at::Tensor &, ::std::optional, ::std::optional, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "atol_rtol_float"; + static constexpr const char* schema_str = "linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); +}; + +struct TORCH_API linalg_pinv_atol_rtol_float_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, ::std::optional, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "atol_rtol_float_out"; + static constexpr const char* schema_str = "linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_pinv { + using schema = at::Tensor (const at::Tensor &, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, double rcond, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double rcond, bool hermitian); +}; + +struct TORCH_API linalg_pinv_rcond_tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "rcond_tensor"; + static constexpr const char* schema_str = "linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & rcond, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & rcond, bool hermitian); +}; + +struct TORCH_API linalg_pinv_out { + using schema = at::Tensor & (const at::Tensor &, double, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, double rcond, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double rcond, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_pinv_out_rcond_tensor { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_pinv"; + static constexpr const char* overload_name = "out_rcond_tensor"; + static constexpr const char* schema_str = "linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & rcond, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & rcond, bool hermitian, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bcb57ee939a5850939356c266e8e812f935201ba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple linalg_qr(const at::Tensor & A, c10::string_view mode="reduced"); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h new file mode 100644 index 0000000000000000000000000000000000000000..053e2e788ca81a3efab0ae48ea187574f2596b10 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet) +inline ::std::tuple linalg_slogdet(const at::Tensor & A) { + return at::_ops::linalg_slogdet::call(A); +} + +// aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet) +inline ::std::tuple linalg_slogdet_out(at::Tensor & sign, at::Tensor & logabsdet, const at::Tensor & A) { + return at::_ops::linalg_slogdet_out::call(A, sign, logabsdet); +} +// aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet) +inline ::std::tuple linalg_slogdet_outf(const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet) { + return at::_ops::linalg_slogdet_out::call(A, sign, logabsdet); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex.h new file mode 100644 index 0000000000000000000000000000000000000000..7539f817ccbae78ac3c1736e1b9f52d98dedfaa1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info) +inline ::std::tuple linalg_solve_ex(const at::Tensor & A, const at::Tensor & B, bool left=true, bool check_errors=false) { + return at::_ops::linalg_solve_ex::call(A, B, left, check_errors); +} + +// aten::linalg_solve_ex.out(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) info) -> (Tensor(a!) result, Tensor(b!) info) +inline ::std::tuple linalg_solve_ex_out(at::Tensor & result, at::Tensor & info, const at::Tensor & A, const at::Tensor & B, bool left=true, bool check_errors=false) { + return at::_ops::linalg_solve_ex_out::call(A, B, left, check_errors, result, info); +} +// aten::linalg_solve_ex.out(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) info) -> (Tensor(a!) result, Tensor(b!) info) +inline ::std::tuple linalg_solve_ex_outf(const at::Tensor & A, const at::Tensor & B, bool left, bool check_errors, at::Tensor & result, at::Tensor & info) { + return at::_ops::linalg_solve_ex_out::call(A, B, left, check_errors, result, info); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex_native.h new file mode 100644 index 0000000000000000000000000000000000000000..959f28f081d57afb3dc59a01ad6c69ba915d4ca9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_ex_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple linalg_solve_ex(const at::Tensor & A, const at::Tensor & B, bool left=true, bool check_errors=false); +TORCH_API ::std::tuple linalg_solve_ex_out(const at::Tensor & A, const at::Tensor & B, bool left, bool check_errors, at::Tensor & result, at::Tensor & info); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b3c050c3c21fc331d37b46fedada2106a90ce20e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor linalg_solve_triangular(const at::Tensor & self, const at::Tensor & B, bool upper, bool left=true, bool unitriangular=false); +TORCH_API at::Tensor & linalg_solve_triangular_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & B, bool upper, bool left=true, bool unitriangular=false); +TORCH_API at::Tensor & linalg_solve_triangular_outf(const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_tensorsolve_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_tensorsolve_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c4e6af377fd3420c35e20eba34b5a1538c777c07 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_tensorsolve_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor linalg_tensorsolve(const at::Tensor & self, const at::Tensor & other, at::OptionalIntArrayRef dims=::std::nullopt); +TORCH_API at::Tensor & linalg_tensorsolve_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, at::OptionalIntArrayRef dims=::std::nullopt); +TORCH_API at::Tensor & linalg_tensorsolve_outf(const at::Tensor & self, const at::Tensor & other, at::OptionalIntArrayRef dims, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6411403bfd31fab62d5a604158d113da1b054a6e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_linalg_vector_norm_out : public at::meta::structured_linalg_vector_norm { +void impl(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log.h new file mode 100644 index 0000000000000000000000000000000000000000..5f4050ec1d69a08b46a81da02191316db7abfd3f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::log(Tensor self) -> Tensor +inline at::Tensor log(const at::Tensor & self) { + return at::_ops::log::call(self); +} + +// aten::log_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & log_(at::Tensor & self) { + return at::_ops::log_::call(self); +} + +// aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::log_out::call(self, out); +} +// aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::log_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f34b2956d8d622e63ef50cc060aca47e05fb7949 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor log10(const at::Tensor & self); +TORCH_API at::Tensor & log10_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & log10_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & log10_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..08c9c94f61bce5fc3baadda80c78a2deadaff585 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log10_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor log10(const at::Tensor & self); +TORCH_API at::Tensor & log10_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & log10_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & log10_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7d591023efad4bbb96df6f2762eaa3fe9315acad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor log1p(const at::Tensor & self); +TORCH_API at::Tensor & log1p_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & log1p_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & log1p_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h new file mode 100644 index 0000000000000000000000000000000000000000..ce087a7bb3c0f7d65707597594adfbc139b40d6c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::log2(Tensor self) -> Tensor +inline at::Tensor log2(const at::Tensor & self) { + return at::_ops::log2::call(self); +} + +// aten::log2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & log2_(at::Tensor & self) { + return at::_ops::log2_::call(self); +} + +// aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log2_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::log2_out::call(self, out); +} +// aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log2_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::log2_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal.h new file mode 100644 index 0000000000000000000000000000000000000000..513e17d4489cbf589eb2dec2659fed066bdd787b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::log_normal.out(Tensor self, float mean=1, float std=2, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_normal_out(at::Tensor & out, const at::Tensor & self, double mean=1, double std=2, ::std::optional generator=::std::nullopt) { + return at::_ops::log_normal_out::call(self, mean, std, generator, out); +} +// aten::log_normal.out(Tensor self, float mean=1, float std=2, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_normal_outf(const at::Tensor & self, double mean, double std, ::std::optional generator, at::Tensor & out) { + return at::_ops::log_normal_out::call(self, mean, std, generator, out); +} + +// aten::log_normal(Tensor self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor +inline at::Tensor log_normal(const at::Tensor & self, double mean=1, double std=2, ::std::optional generator=::std::nullopt) { + return at::_ops::log_normal::call(self, mean, std, generator); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal_native.h new file mode 100644 index 0000000000000000000000000000000000000000..844d7e30e8137d8533a64702805d23a9951cc992 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_normal_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor log_normal(const at::Tensor & self, double mean=1, double std=2, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & log_normal_out(const at::Tensor & self, double mean, double std, ::std::optional generator, at::Tensor & out); +TORCH_API at::Tensor & log_normal_(at::Tensor & self, double mean=1, double std=2, ::std::optional generator=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_sigmoid_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_sigmoid_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9eb14fd65ec71a818c6c9e8584a5cf4940b2b798 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_sigmoid_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API log_sigmoid_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::log_sigmoid_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "log_sigmoid_backward.grad_input(Tensor grad_output, Tensor self, Tensor buffer, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer, at::Tensor & grad_input); +}; + +struct TORCH_API log_sigmoid_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::log_sigmoid_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_softmax_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8fd4932c43a2334381351fbe566c28bfc25ec99c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/log_softmax_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & log_softmax_out(const at::Tensor & self, int64_t dim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e54a7598265fa0b8601979f0f20f3342be0ff60d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor logcumsumexp(const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & logcumsumexp_out(at::Tensor & out, const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & logcumsumexp_outf(const at::Tensor & self, int64_t dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_and_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_and_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9d20d2b1436863e67a82348083332673998935ee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_and_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API logical_and { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_and"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "logical_and(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API logical_and_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_and_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API logical_and_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_and"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_not_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_not_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4517cb5e9968de0b07f8729d8d0e532fb626a1be --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_not_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API logical_not { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_not"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "logical_not(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API logical_not_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_not_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "logical_not_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API logical_not_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::logical_not"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_or.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_or.h new file mode 100644 index 0000000000000000000000000000000000000000..50f99ea191174ac015a95c31b925e128d745de0e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logical_or.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::logical_or(Tensor self, Tensor other) -> Tensor +inline at::Tensor logical_or(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::logical_or::call(self, other); +} + +// aten::logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logical_or_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::logical_or_out::call(self, other, out); +} +// aten::logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logical_or_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::logical_or_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logsumexp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logsumexp.h new file mode 100644 index 0000000000000000000000000000000000000000..499ebbba9d810cd81501dbc36880990cf4433c11 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/logsumexp.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor logsumexp(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false) { + return at::_ops::logsumexp::call(self, dim, keepdim); +} + +// aten::logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logsumexp_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false) { + return at::_ops::logsumexp_out::call(self, dim, keepdim, out); +} +// aten::logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logsumexp_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, at::Tensor & out) { + return at::_ops::logsumexp_out::call(self, dim, keepdim, out); +} + +// aten::logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor logsumexp(const at::Tensor & self, at::DimnameList dim, bool keepdim=false) { + return at::_ops::logsumexp_names::call(self, dim, keepdim); +} + +// aten::logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logsumexp_out(at::Tensor & out, const at::Tensor & self, at::DimnameList dim, bool keepdim=false) { + return at::_ops::logsumexp_names_out::call(self, dim, keepdim, out); +} +// aten::logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & logsumexp_outf(const at::Tensor & self, at::DimnameList dim, bool keepdim, at::Tensor & out) { + return at::_ops::logsumexp_names_out::call(self, dim, keepdim, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_cell_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_cell_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..45481d76a8a8e61bb3c261cd778f2f4ced6ea315 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_cell_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API lstm_cell { + using schema = ::std::tuple (const at::Tensor &, at::TensorList, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::lstm_cell"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, at::TensorList hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::TensorList hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..cfc3099f87af6e3c49b06d9c012ebb3cb5b58b6e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[]) +inline ::std::tuple,::std::vector> lstm_mps_backward(const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & z_state, const at::Tensor & cell_state_fwd, const at::Tensor & input, const at::Tensor & layersOutputs, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first) { + return at::_ops::lstm_mps_backward::call(grad_y, grad_hy, grad_cy, z_state, cell_state_fwd, input, layersOutputs, hx, params, has_biases, num_layers, dropout, train, bidirectional, batch_first); +} + +// aten::lstm_mps_backward.out(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!)[] out1, Tensor(c!)[] out2) -> () +inline void lstm_mps_backward_out(at::Tensor & out0, at::TensorList out1, at::TensorList out2, const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & z_state, const at::Tensor & cell_state_fwd, const at::Tensor & input, const at::Tensor & layersOutputs, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first) { + return at::_ops::lstm_mps_backward_out::call(grad_y, grad_hy, grad_cy, z_state, cell_state_fwd, input, layersOutputs, hx, params, has_biases, num_layers, dropout, train, bidirectional, batch_first, out0, out1, out2); +} +// aten::lstm_mps_backward.out(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!)[] out1, Tensor(c!)[] out2) -> () +inline void lstm_mps_backward_outf(const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & z_state, const at::Tensor & cell_state_fwd, const at::Tensor & input, const at::Tensor & layersOutputs, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, at::Tensor & out0, at::TensorList out1, at::TensorList out2) { + return at::_ops::lstm_mps_backward_out::call(grad_y, grad_hy, grad_cy, z_state, cell_state_fwd, input, layersOutputs, hx, params, has_biases, num_layers, dropout, train, bidirectional, batch_first, out0, out1, out2); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..adc449dc34ca50b696bef07c830fae92aceead68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lstm_mps_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API void lstm_mps_backward_out(at::Tensor & out0, at::TensorList out1, at::TensorList out2, const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & z_state, const at::Tensor & cell_state_fwd, const at::Tensor & input, const at::Tensor & layersOutputs, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); +TORCH_API void lstm_mps_backward_outf(const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & z_state, const at::Tensor & cell_state_fwd, const at::Tensor & input, const at::Tensor & layersOutputs, at::TensorList hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, at::Tensor & out0, at::TensorList out1, at::TensorList out2); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lu_unpack_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lu_unpack_native.h new file mode 100644 index 0000000000000000000000000000000000000000..772eb16ebcd44b969ff186e0c45891ba7cf9de16 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/lu_unpack_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_lu_unpack_out : public at::meta::structured_lu_unpack { +void impl(const at::Tensor & LU_data, const at::Tensor & LU_pivots, bool unpack_data, bool unpack_pivots, const at::Tensor & P, const at::Tensor & L, const at::Tensor & U); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/masked_scatter.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/masked_scatter.h new file mode 100644 index 0000000000000000000000000000000000000000..33fd4e2a2f266e2b9eaaee7856e1f742f4774d8b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/masked_scatter.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor +inline at::Tensor masked_scatter(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & source) { + return at::_ops::masked_scatter::call(self, mask, source); +} + +// aten::masked_scatter.out(Tensor self, Tensor mask, Tensor source, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & masked_scatter_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mask, const at::Tensor & source) { + return at::_ops::masked_scatter_out::call(self, mask, source, out); +} +// aten::masked_scatter.out(Tensor self, Tensor mask, Tensor source, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & masked_scatter_outf(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & source, at::Tensor & out) { + return at::_ops::masked_scatter_out::call(self, mask, source, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c65e783f68d5c327e11174eef3aefea9ab35d928 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor matrix_exp_backward(const at::Tensor & self, const at::Tensor & grad); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f0e8c5f4500d45f52c84e64a2b08b54c9aa5d361 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API matrix_exp { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::matrix_exp"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "matrix_exp(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..446aa8fc36ce88e297baa35807c8e2eabbb8fb5d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor max_pool1d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..fe9c5790ee0d93f60810d13e08798ee95f75202e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API max_pool2d { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::max_pool2d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..2007d02b87626fb28000d375fbe5e0a0008c2e7d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_out(at::Tensor & out, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_outf(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} + +// aten::max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) +inline ::std::tuple max_pool2d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..4df2ef4710f438f9fe6b66cdceab1836c7dd87ed --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_max_pool2d_with_indices_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..bd09129fd641590db85a93cfa348869355cdad43 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_max_pool2d_with_indices : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_unpool3d_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_unpool3d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..112e226860c90e17fd2fd339d2284634b9bdf091 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/max_unpool3d_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API max_unpool3d_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::max_unpool3d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & indices, c10::SymIntArrayRef output_size, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & indices, c10::SymIntArrayRef output_size, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & out); +}; + +struct TORCH_API max_unpool3d { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::max_unpool3d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & indices, c10::SymIntArrayRef output_size, at::IntArrayRef stride, at::IntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & indices, c10::SymIntArrayRef output_size, at::IntArrayRef stride, at::IntArrayRef padding); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c18bdab1ca31dfa5ccc1f03a42206eadb2d78411 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor maximum(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h new file mode 100644 index 0000000000000000000000000000000000000000..aafbe1e1135d6b343c3553aa1960fed7826c0e56 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_minimum_out : public at::meta::structured_minimum { +void impl(const at::Tensor & self, const at::Tensor & other, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..5424cceaab968f78601c9defb3e572bc7bf8bd7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor) +inline ::std::tuple miopen_batch_norm_backward(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon) { + return at::_ops::miopen_batch_norm_backward::call(input, grad_output, weight, running_mean, running_var, save_mean, save_var, epsilon); +} + +// aten::miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple miopen_batch_norm_backward_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon) { + return at::_ops::miopen_batch_norm_backward_out::call(input, grad_output, weight, running_mean, running_var, save_mean, save_var, epsilon, out0, out1, out2); +} +// aten::miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple miopen_batch_norm_backward_outf(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::miopen_batch_norm_backward_out::call(input, grad_output, weight, running_mean, running_var, save_mean, save_var, epsilon, out0, out1, out2); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8faac16ef1c602c74a60c042ad23acccb0be48ac --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API miopen_batch_norm_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_batch_norm_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon); +}; + +struct TORCH_API miopen_batch_norm_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, double, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_batch_norm_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1ab80e699f50470aba96f05e3c4de8489cad8871 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple miopen_batch_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); +TORCH_API ::std::tuple miopen_batch_norm_outf(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46f03d7df24729fea38ce557cd443df02f92990e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple miopen_batch_norm(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_depthwise_convolution_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_depthwise_convolution_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..988a20d28abd03a4ac514374b5018e5ee490325d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_depthwise_convolution_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API miopen_depthwise_convolution { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_depthwise_convolution"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); +}; + +struct TORCH_API miopen_depthwise_convolution_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, bool, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_depthwise_convolution"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "miopen_depthwise_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0667b8a763821ee8bb74f996b1161f4480c56b35 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple miopen_rnn(const at::Tensor & input, at::TensorList weight, int64_t weight_stride0, const at::Tensor & hx, const ::std::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, at::IntArrayRef batch_sizes, const ::std::optional & dropout_state); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish.h new file mode 100644 index 0000000000000000000000000000000000000000..488e00aadbd9189eae833ebfddaaf41afbe39cee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::mish(Tensor self) -> Tensor +inline at::Tensor mish(const at::Tensor & self) { + return at::_ops::mish::call(self); +} + +// aten::mish_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & mish_(at::Tensor & self) { + return at::_ops::mish_::call(self); +} + +// aten::mish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & mish_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::mish_out::call(self, out); +} +// aten::mish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & mish_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::mish_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8ea3ca88972b258358fc859c94d64edc6a7fc518 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor mish(const at::Tensor & self); +TORCH_API at::Tensor & mish_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & mish_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & mish_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_convolution_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_convolution_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..08d88d462c2dff5e042e3681fb82e01c89ec9a08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_convolution_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor mkldnn_convolution(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups); +TORCH_API at::Tensor mkldnn_convolution_symint(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); +TORCH_API at::Tensor & mkldnn_convolution_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups); +TORCH_API at::Tensor & mkldnn_convolution_outf(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, at::Tensor & out); +TORCH_API at::Tensor & mkldnn_convolution_symint_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); +TORCH_API at::Tensor & mkldnn_convolution_symint_outf(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..87a7ef2f54f5abf1d74b4d185c700d5a3ada5d59 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & mkldnn_max_pool2d_backward_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, const at::Tensor & input, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +TORCH_API at::Tensor & mkldnn_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & output, const at::Tensor & input, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e77e795a1ada06ed4b9a94d4d3904f18d2c8474c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & mkldnn_max_pool2d_out(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out); +TORCH_API at::Tensor mkldnn_max_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..79b58393ab7175c637ab3f22808e59d2edbd8c99 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API mkldnn_reorder_conv2d_weight { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mkldnn_reorder_conv2d_weight"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); +}; + +struct TORCH_API mkldnn_reorder_conv2d_weight_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mkldnn_reorder_conv2d_weight"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "mkldnn_reorder_conv2d_weight.out(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..39d5366ad61f8df25b505890d1added2d230a7cc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor moveaxis(const at::Tensor & self, at::IntArrayRef source, at::IntArrayRef destination); +TORCH_API at::Tensor moveaxis(const at::Tensor & self, int64_t source, int64_t destination); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h new file mode 100644 index 0000000000000000000000000000000000000000..225df56de6d4072982924a2d92b4c3d851aa818e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor movedim(const at::Tensor & self, at::IntArrayRef source, at::IntArrayRef destination) { + return at::_ops::movedim_intlist::call(self, source, destination); +} + +// aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor movedim(const at::Tensor & self, int64_t source, int64_t destination) { + return at::_ops::movedim_int::call(self, source, destination); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d80d3117cfdaf23eac8c9a3c71303b3904914545 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple mps_convolution_backward_out_symint(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..73708f0367fd696ace228d684c46484965e4a881 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API mps_convolution_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mps_convolution_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); +}; + +struct TORCH_API mps_convolution_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, ::std::array, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mps_convolution_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "mps_convolution_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multi_margin_loss_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multi_margin_loss_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..869b70582b327e1fb25ce2fda473148b1e79ddc3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multi_margin_loss_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor multi_margin_loss(const at::Tensor & self, const at::Tensor & target, const at::Scalar & p=1, const at::Scalar & margin=1, const ::std::optional & weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & multi_margin_loss_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & target, const at::Scalar & p=1, const at::Scalar & margin=1, const ::std::optional & weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & multi_margin_loss_outf(const at::Tensor & self, const at::Tensor & target, const at::Scalar & p, const at::Scalar & margin, const ::std::optional & weight, int64_t reduction, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4ff8510c59f20a52fb6f9629619dc41efe26d184 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple multilabel_margin_loss_forward_cpu(const at::Tensor & self, const at::Tensor & target, int64_t reduction); +TORCH_API ::std::tuple multilabel_margin_loss_forward_out_cpu(const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); +TORCH_API ::std::tuple multilabel_margin_loss_forward_cuda(const at::Tensor & self, const at::Tensor & target, int64_t reduction); +TORCH_API ::std::tuple multilabel_margin_loss_forward_out_cuda(const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f078659e6e14f28d7b9891c78d386465d8ce1dc3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API multilabel_margin_loss_forward_output { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::multilabel_margin_loss_forward"; + static constexpr const char* overload_name = "output"; + static constexpr const char* schema_str = "multilabel_margin_loss_forward.output(Tensor self, Tensor target, int reduction, *, Tensor(a!) output, Tensor(b!) is_target) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); +}; + +struct TORCH_API multilabel_margin_loss_forward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::multilabel_margin_loss_forward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & target, int64_t reduction); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & target, int64_t reduction); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc96880eb1b33356f1981244400ac6867cf73371 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor multinomial(const at::Tensor & self, int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor multinomial_symint(const at::Tensor & self, c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_out(at::Tensor & out, const at::Tensor & self, int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_outf(const at::Tensor & self, int64_t num_samples, bool replacement, ::std::optional generator, at::Tensor & out); +TORCH_API at::Tensor & multinomial_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_symint_outf(const at::Tensor & self, c10::SymInt num_samples, bool replacement, ::std::optional generator, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multiply_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multiply_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0087787309235f217b5388508e16678b339858a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/multiply_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor multiply(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & multiply_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & multiply_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & multiply_(at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor multiply(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & multiply_(at::Tensor & self, const at::Scalar & other); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h new file mode 100644 index 0000000000000000000000000000000000000000..696990710201725771fcf0d97eb02a3c3952b14c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor nanmean(const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::nanmean::call(self, dim, keepdim, dtype); +} + +// aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmean_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::nanmean_out::call(self, dim, keepdim, dtype, out); +} +// aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmean_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::nanmean_out::call(self, dim, keepdim, dtype, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h new file mode 100644 index 0000000000000000000000000000000000000000..da9a08795a529fa4d4fded483e1f98c3f812aecc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::nanmedian(Tensor self) -> Tensor +inline at::Tensor nanmedian(const at::Tensor & self) { + return at::_ops::nanmedian::call(self); +} + +// aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple nanmedian(const at::Tensor & self, int64_t dim, bool keepdim=false) { + return at::_ops::nanmedian_dim::call(self, dim, keepdim); +} + +// aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim, bool keepdim=false) { + return at::_ops::nanmedian_dim_values::call(self, dim, keepdim, values, indices); +} +// aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_outf(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices) { + return at::_ops::nanmedian_dim_values::call(self, dim, keepdim, values, indices); +} + +// aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple nanmedian(const at::Tensor & self, at::Dimname dim, bool keepdim=false) { + return at::_ops::nanmedian_names_dim::call(self, dim, keepdim); +} + +// aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, at::Dimname dim, bool keepdim=false) { + return at::_ops::nanmedian_names_dim_values::call(self, dim, keepdim, values, indices); +} +// aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_outf(const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & values, at::Tensor & indices) { + return at::_ops::nanmedian_names_dim_values::call(self, dim, keepdim, values, indices); +} + +// aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmedian_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::nanmedian_out::call(self, out); +} +// aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmedian_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::nanmedian_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fd99f8c69a1388383fde288d2b2f7a3b0339f2d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor nanmedian(const at::Tensor & self); +TORCH_API ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim, bool keepdim=false); +TORCH_API ::std::tuple nanmedian_outf(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nansum_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nansum_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..06b3d9cfc6257bb45d0bda88185bf62502eaf6e1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nansum_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor nansum(const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & nansum_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & nansum_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_channel_shuffle_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_channel_shuffle_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..359ce36242b862ba4283ed8be31fd95d4b86b70d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_channel_shuffle_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor native_channel_shuffle(const at::Tensor & self, int64_t groups); +TORCH_API at::Tensor native_channel_shuffle_symint(const at::Tensor & self, c10::SymInt groups); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..30b16748bcd004c7b28be5e5b2c89c79e522c16d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & native_dropout_backward_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & mask, double scale); +TORCH_API at::Tensor & native_dropout_backward_outf(const at::Tensor & grad_output, const at::Tensor & mask, double scale, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9ad6ac6ac969513b1b0d0f15cc737e0b8cc17935 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_dropout_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API native_dropout_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::native_dropout_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & mask, double scale); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & mask, double scale); +}; + +struct TORCH_API native_dropout_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, double, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::native_dropout_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "native_dropout_backward.out(Tensor grad_output, Tensor mask, float scale, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & mask, double scale, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & mask, double scale, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..3306f4fcb18319af80d878d0f21f33f7cec2cf48 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor) +inline ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps) { + return at::_ops::native_group_norm::call(input, weight, bias, N, C, HxW, group, eps); +} +namespace symint { + template >> + ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps) { + return at::_ops::native_group_norm::call(input, weight, bias, N, C, HxW, group, eps); + } +} + +// aten::native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor) +inline ::std::tuple native_group_norm_symint(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps) { + return at::_ops::native_group_norm::call(input, weight, bias, N, C, HxW, group, eps); +} +namespace symint { + template >> + ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps) { + return at::_ops::native_group_norm::call(input, weight, bias, N, C, HxW, group, eps); + } +} + +// aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple native_group_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); +} +namespace symint { + template >> + ::std::tuple native_group_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); + } +} + +// aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple native_group_norm_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); +} +namespace symint { + template >> + ::std::tuple native_group_norm_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); + } +} + +// aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple native_group_norm_symint_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); +} +namespace symint { + template >> + ::std::tuple native_group_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); + } +} + +// aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple native_group_norm_symint_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); +} +namespace symint { + template >> + ::std::tuple native_group_norm_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::native_group_norm_out::call(input, weight, bias, N, C, HxW, group, eps, out0, out1, out2); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9b0d6e7300cbaca54a4c784dd59a2381583e7e46 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple native_group_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, int64_t N, int64_t C, int64_t HxW, int64_t group, ::std::array output_mask); +TORCH_API ::std::tuple native_group_norm_backward_symint(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e9da9a02e887a2cf80812c25a6fb23b94b369f53 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps); +TORCH_API ::std::tuple native_group_norm_symint(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); +TORCH_API ::std::tuple native_group_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps); +TORCH_API ::std::tuple native_group_norm_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple native_group_norm_symint_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); +TORCH_API ::std::tuple native_group_norm_symint_outf(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_layer_norm_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_layer_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e7d0c1572a6f39661309aed0cf571872f9697e03 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/native_layer_norm_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple math_native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +TORCH_API ::std::tuple native_layer_norm_out_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple layer_norm_cpu(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +TORCH_API ::std::tuple layer_norm_cuda(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +TORCH_API ::std::tuple nested_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..23ea7f42b88b75e600b6e830cdbb06ef1654b722 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..93c787b29822009a0fb8c775f7c2b4620d928cc1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/ne_meta_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Tensor & other); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c5ecd461d4194e20404cfe83c9d7cf13655abb73 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor new_empty_strided(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}); +TORCH_API at::Tensor new_empty_strided(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}); +TORCH_API at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h new file mode 100644 index 0000000000000000000000000000000000000000..24d96ebe44389925d9ef6cb159b9b15bf20f9d28 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & new_empty_strided_out_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out); +TORCH_API at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_ones_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_ones_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f04cee64bcac8b92440ddbb61c0537cc97927a73 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/new_ones_compositeexplicitautograd_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor new_ones(const at::Tensor & self, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor new_ones(const at::Tensor & self, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor new_ones_symint(const at::Tensor & self, c10::SymIntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor new_ones_symint(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor & new_ones_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size); +TORCH_API at::Tensor & new_ones_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & new_ones_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size); +TORCH_API at::Tensor & new_ones_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3e8942097619bba20b528897c9722dda72182b55 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API nextafter_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API nextafter { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nextafter(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API nextafter_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b0044b6d98242965a16799b5606dbc00d26e1f35 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor nll_loss2d_backward_cpu(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss2d_backward_out_cpu(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); +TORCH_API at::Tensor nll_loss2d_backward_cuda(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss2d_backward_out_cuda(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..154db8e8a7e623ea13f8d02553bb0f3fd332fbe2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor nll_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor nll_loss_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const at::Tensor & total_weight); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce47d80648c655ce9b02f554612e997e85b9711b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor nll_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor nll_loss_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); +TORCH_API at::Tensor & nll_loss_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_nd_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_nd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a25350ed0d138698c750b289b37a6c4bf19f2508 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_nd_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor nll_loss_nd_symint(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight={}, int64_t reduction=at::Reduction::Mean, c10::SymInt ignore_index=-100); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..daa4908b74937beff0ac223d195230b6a5b6d282 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API nonzero_numpy { + using schema = ::std::vector (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nonzero_numpy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nonzero_numpy(Tensor self) -> Tensor[]"; + static ::std::vector call(const at::Tensor & self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..27b12adbdf1c4786d0e2d4fea656cd349ecfc6b4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype); +TORCH_API at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim=false); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1292b551374bf4bfe42ea644f5af95ca7ce8c7c3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor numpy_T(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..82b619de6f6e125b6cd08c9725b808ab018a4b24 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t output_nr(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/permute_copy.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/permute_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..f450d3ce7b6beeb53562a7a7536c1d92b141eed0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/permute_copy.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::permute_copy(Tensor self, int[] dims) -> Tensor +inline at::Tensor permute_copy(const at::Tensor & self, at::IntArrayRef dims) { + return at::_ops::permute_copy::call(self, dims); +} + +// aten::permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & permute_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dims) { + return at::_ops::permute_copy_out::call(self, dims, out); +} +// aten::permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & permute_copy_outf(const at::Tensor & self, at::IntArrayRef dims, at::Tensor & out) { + return at::_ops::permute_copy_out::call(self, dims, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pin_memory.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pin_memory.h new file mode 100644 index 0000000000000000000000000000000000000000..9faf0bcf64bbc6d8afdc00bff8b3ea875e5deced --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pin_memory.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..93a2dd1dd04bcb44bcf97849efac7e5e0a8fc324 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & pixel_unshuffle_out(at::Tensor & out, const at::Tensor & self, int64_t downscale_factor); +TORCH_API at::Tensor & pixel_unshuffle_outf(const at::Tensor & self, int64_t downscale_factor, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce642cbae71c9456208814d4a4dad363826e1612 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor poisson_nll_loss(const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h new file mode 100644 index 0000000000000000000000000000000000000000..363da3071604a27cbff262e7f60974eb5c22eda6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor poisson_nll_loss(const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..88aaacb8de4986f9f845c13b10a19fd9cc0793d0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API poisson_nll_loss { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool, bool, double, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::poisson_nll_loss"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "poisson_nll_loss(Tensor input, Tensor target, bool log_input, bool full, float eps, int reduction) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..faef703cb37d1e492777a47087a44d8273143f96 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor polar(const at::Tensor & abs, const at::Tensor & angle); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e363b27034b5d4a352f697dab7448945a50c66f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor polygamma(int64_t n, const at::Tensor & self); +TORCH_API at::Tensor & polygamma_out(at::Tensor & out, int64_t n, const at::Tensor & self); +TORCH_API at::Tensor & polygamma_outf(int64_t n, const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/put.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/put.h new file mode 100644 index 0000000000000000000000000000000000000000..24873634ad47ca0acf8e24173e3756f5c9ca6d40 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/put.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor +inline at::Tensor put(const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate=false) { + return at::_ops::put::call(self, index, source, accumulate); +} + +// aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & put_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate=false) { + return at::_ops::put_out::call(self, index, source, accumulate, out); +} +// aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & put_outf(const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate, at::Tensor & out) { + return at::_ops::put_out::call(self, index, source, accumulate, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..44e28e469c2dfbe2c72edfd8c20147d57a0b8b05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & q_per_channel_scales_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & q_per_channel_scales_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_zero_points_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_zero_points_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0fd82edca20e3c191fe37b3bb3b4ccfa2c649952 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_zero_points_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API q_per_channel_zero_points { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::q_per_channel_zero_points"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "q_per_channel_zero_points(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API q_per_channel_zero_points_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::q_per_channel_zero_points"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "q_per_channel_zero_points.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..02048b9f93bf988385d4e5a641f139693912e966 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor quantile(const at::Tensor & self, const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_outf(const at::Tensor & self, const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); +TORCH_API at::Tensor quantile(const at::Tensor & self, double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(at::Tensor & out, const at::Tensor & self, double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_outf(const at::Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3b754b9d1c6e5c20c5fba1ce1ba93a1bae651707 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor quantile(const at::Tensor & self, const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(const at::Tensor & self, const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); +TORCH_API at::Tensor quantile(const at::Tensor & self, double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(const at::Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantized_max_pool2d_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantized_max_pool2d_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..eb2497741c0e83db20414667cc7a3cae7d8f389a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/quantized_max_pool2d_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & quantized_max_pool2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +TORCH_API at::Tensor & quantized_max_pool2d_outf(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rad2deg_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rad2deg_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7892900b3ad1318d4216297f79fabe5ce2cf1c68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rad2deg_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor rad2deg(const at::Tensor & self); +TORCH_API at::Tensor & rad2deg_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & rad2deg_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & rad2deg_(at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1eee4a0d4296121dae320e5ad3330c170af8344c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_compositeexplicitautograd_dispatch.h @@ -0,0 +1,55 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional names, at::TensorOptions options={}); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional names, at::TensorOptions options={}); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor & rand_out(at::Tensor & out, at::IntArrayRef size, ::std::optional names); +TORCH_API at::Tensor & rand_outf(at::IntArrayRef size, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor & rand_symint_out(at::Tensor & out, c10::SymIntArrayRef size, ::std::optional names); +TORCH_API at::Tensor & rand_symint_outf(c10::SymIntArrayRef size, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, ::std::optional names, at::TensorOptions options={}); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, at::TensorOptions options={}); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor & rand_out(at::Tensor & out, at::IntArrayRef size, ::std::optional generator, ::std::optional names); +TORCH_API at::Tensor & rand_outf(at::IntArrayRef size, ::std::optional generator, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor & rand_symint_out(at::Tensor & out, c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names); +TORCH_API at::Tensor & rand_symint_outf(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor & rand_out(at::Tensor & out, at::IntArrayRef size); +TORCH_API at::Tensor & rand_outf(at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & rand_symint_out(at::Tensor & out, c10::SymIntArrayRef size); +TORCH_API at::Tensor & rand_symint_outf(c10::SymIntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, at::TensorOptions options={}); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional generator, at::TensorOptions options={}); +TORCH_API at::Tensor rand_symint(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..573380f7d78a445c872e74bb3061b0c716dde648 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor rand_like(const at::Tensor & self, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +TORCH_API at::Tensor & rand_like_out(at::Tensor & out, const at::Tensor & self, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_outf(const at::Tensor & self, ::std::optional memory_format, at::Tensor & out); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional generator, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +TORCH_API at::Tensor & rand_like_out(at::Tensor & out, const at::Tensor & self, ::std::optional generator, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_outf(const at::Tensor & self, ::std::optional generator, ::std::optional memory_format, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_native.h new file mode 100644 index 0000000000000000000000000000000000000000..82fafbabdece76db505fb3e70d24de05be4b73fa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_out(const at::Tensor & self, ::std::optional memory_format, at::Tensor & out); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional generator, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_generator_out(const at::Tensor & self, ::std::optional generator, ::std::optional memory_format, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1df5786603a26d1fa1bd9d2d0a8ba6566a143d88 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rand_native.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional names, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor & rand_names_out_symint(c10::SymIntArrayRef size, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, ::std::optional names, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor & rand_generator_with_names_out_symint(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +TORCH_API at::Tensor & rand_out(at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & rand_out(at::IntArrayRef size, ::std::optional generator, at::Tensor & out); +TORCH_API at::Tensor rand(at::IntArrayRef size, ::std::optional generator, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/randn_like_compositeimplicitautogradnestedtensor_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/randn_like_compositeimplicitautogradnestedtensor_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c9611243a727d6d444b8cca7cabcf34a7f63b447 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/randn_like_compositeimplicitautogradnestedtensor_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautogradnestedtensor { + +TORCH_API at::Tensor randn_like(const at::Tensor & self, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor randn_like(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +TORCH_API at::Tensor randn_like(const at::Tensor & self, ::std::optional generator, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor randn_like(const at::Tensor & self, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); + +} // namespace compositeimplicitautogradnestedtensor +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/random_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/random_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..445640235bba9b9b6842a2dbfd13829cea861848 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/random_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & random_(at::Tensor & self, int64_t from, ::std::optional to, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & random_(at::Tensor & self, int64_t to, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & random_(at::Tensor & self, ::std::optional generator=::std::nullopt); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3f2a27a767332ea7e36c8df5e0303908ff0fcf59 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor reciprocal(const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cb1a4073af072b8560c1f9e32797a0e0e09ffb10 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor reciprocal(const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & reciprocal_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ab0905b8ae242ff24579abd03c0490ec2c713f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_reciprocal_out : public at::meta::structured_reciprocal { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ec9f1fe863e48d674c55f0f79a444a5100c8a1be --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API reciprocal { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reciprocal"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "reciprocal(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API reciprocal_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reciprocal_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "reciprocal_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API reciprocal_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reciprocal"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream.h new file mode 100644 index 0000000000000000000000000000000000000000..1202e3103c4a8ec633b657477c3e76021a4337aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ac2b1ce7a26b255761dd233668849a317c9c471f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/record_stream_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void record_stream_cuda(at::Tensor & self, at::Stream s); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..885f38dafdddb83dffab3a58762bf9965df092da --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor reflection_pad1d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad1d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding, at::Tensor & grad_input); +TORCH_API at::Tensor & reflection_pad1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..22113b00e720a9fd332118285f29e41a76ce0dcf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor reflection_pad2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad2d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & reflection_pad2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding, at::Tensor & grad_input); +TORCH_API at::Tensor & reflection_pad2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad2d_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4ed0bff0a5bd7cf4589a9ae76fe7b5017538c855 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API reflection_pad2d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reflection_pad2d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); +}; + +struct TORCH_API reflection_pad2d_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reflection_pad2d_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "reflection_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f23b676568fa352b5054d7d4738cc02d8236f84a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor reflection_pad3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad3d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a2c7df7049181ec95ee2f8fa449cc45734e60de7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor relu(const at::Tensor & self); +TORCH_API at::Tensor & relu_(at::Tensor & self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/remainder_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/remainder_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..4fa36e127c43f6a11807bac7d61fcae2c6efe419 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/remainder_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_remainder_Tensor : public TensorIteratorBase { + + + void meta(const at::Tensor & self, const at::Tensor & other); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..61753c283a599c2cd4a86e3e8a7f68e1b73d2f93 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_renorm : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc147f9d936511ed4428de23c6f36fd72d316723 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor replication_pad1d(const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor replication_pad1d_symint(const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_outf(const at::Tensor & self, at::IntArrayRef padding, at::Tensor & out); +TORCH_API at::Tensor & replication_pad1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..84d197f96f510c600f4d1267473538547915e348 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor replication_pad1d(const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor replication_pad1d_symint(const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_outf(const at::Tensor & self, at::IntArrayRef padding, at::Tensor & out); +TORCH_API at::Tensor & replication_pad1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f164f8fa9ff42fe01e49026f41eccf132d491f6b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API replication_pad1d_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::replication_pad1d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "replication_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & out); +}; + +struct TORCH_API replication_pad1d { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::replication_pad1d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "replication_pad1d(Tensor self, SymInt[2] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad3d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..159726d6aa77a6f6fdad83051ba62de67f8ddd97 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad3d_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_replication_pad3d_out_cpu : public at::meta::structured_replication_pad3d { +void impl(const at::Tensor & self, at::ArrayRef padding, const at::Tensor & out); +}; +struct TORCH_API structured_replication_pad3d_out_cuda : public at::meta::structured_replication_pad3d { +void impl(const at::Tensor & self, at::ArrayRef padding, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6288dbfc4a1d0c8d00909c947358586393a9f7ca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor reshape(const at::Tensor & self, at::IntArrayRef shape); +TORCH_API at::Tensor reshape_symint(const at::Tensor & self, c10::SymIntArrayRef shape); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d10297b922bcdb8ed04768fe95dc7e895ba91b39 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor resize_as(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_as_out(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format, const at::Tensor & out); +TORCH_API const at::Tensor & resize_as_(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b238316406372af2c656b885a7d999930525cb60 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor resize(const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor resize_symint(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_out(const at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_outf(const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format, const at::Tensor & out); +TORCH_API const at::Tensor & resize_symint_out(const at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format, const at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c1f51b8d6d33edf26ac0a2024c11986b20d9fd8c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/resize_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API resize_ { + using schema = const at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::resize_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)"; + static const at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); + static const at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); +}; + +struct TORCH_API resize_out { + using schema = const at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::resize"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "resize.out(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)"; + static const at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format, const at::Tensor & out); + static const at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format, const at::Tensor & out); +}; + +struct TORCH_API resize { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::resize"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "resize(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1c2872d7b9cb61c80bebd2d81d9162f2f9a62a52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::ScalarType result_type(const at::Tensor & tensor, const at::Tensor & other); +TORCH_API at::ScalarType result_type(const at::Tensor & tensor, const at::Scalar & other); +TORCH_API at::ScalarType result_type(const at::Scalar & scalar, const at::Tensor & tensor); +TORCH_API at::ScalarType result_type(const at::Scalar & scalar1, const at::Scalar & scalar2); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8368a03bee3f968dfb2bd9a0f9905be7aec1425e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool retains_grad(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fe699967fc8bec490a92aae59233e8f43c46f744 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor rnn_relu_cell(const at::Tensor & input, const at::Tensor & hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const ::std::optional & b_ih={}, const ::std::optional & b_hh={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2d28ec33460d095e3c9a7e2c54d4c6f37bd7c148 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor roll(const at::Tensor & self, at::IntArrayRef shifts, at::IntArrayRef dims={}); +TORCH_API at::Tensor roll_symint(const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims={}); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3b4ee18904cb8919e395d1021b8e388ead5aeaef --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & roll_out_symint(const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims, at::Tensor & out); +TORCH_API at::Tensor roll(const at::Tensor & self, at::IntArrayRef shifts, at::IntArrayRef dims={}); +TORCH_API at::Tensor roll_cuda(const at::Tensor & self, at::IntArrayRef shifts, at::IntArrayRef dims={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8077c42cd2761b83c955866bc4a0f6ccd3ddd5d9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/roll_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API roll { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::roll"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims); +}; + +struct TORCH_API roll_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::roll"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "roll.out(Tensor self, SymInt[1] shifts, int[1] dims=[], *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..035c6298f200f0624afe6669740dd23d26e1c2fa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Tensor & other); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bcd54c36f7ea02f74c04922720853ae66d1fcf95 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..89ecf79da3dcdade71e3d0ae97eb39be1e283514 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API rsub_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +}; + +struct TORCH_API rsub_Scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Scalar"; + static constexpr const char* schema_str = "rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha); +}; + +struct TORCH_API rsub_Tensor_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Tensor_out"; + static constexpr const char* schema_str = "rsub.Tensor_out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); +}; + +struct TORCH_API rsub_Scalar_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Scalar_out"; + static constexpr const char* schema_str = "rsub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select.h new file mode 100644 index 0000000000000000000000000000000000000000..62c333ebabcb1802f22750b15bb3d3b1bfdfc28e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) +inline at::Tensor select(const at::Tensor & self, at::Dimname dim, int64_t index) { + return at::_ops::select_Dimname::call(self, dim, index); +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor select(const at::Tensor & self, int64_t dim, int64_t index) { + return at::_ops::select_int::call(self, dim, index); +} +namespace symint { + template >> + at::Tensor select(const at::Tensor & self, int64_t dim, int64_t index) { + return at::_ops::select_int::call(self, dim, index); + } +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor select_symint(const at::Tensor & self, int64_t dim, c10::SymInt index) { + return at::_ops::select_int::call(self, dim, index); +} +namespace symint { + template >> + at::Tensor select(const at::Tensor & self, int64_t dim, c10::SymInt index) { + return at::_ops::select_int::call(self, dim, index); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select_scatter_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select_scatter_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a4b3c546c1bd412a8c667a8d2cb92fe60032f85a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/select_scatter_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & select_scatter_out_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, c10::SymInt index, at::Tensor & out); +TORCH_API at::Tensor select_scatter_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, c10::SymInt index); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..af9936e1059bba2de8ec47b37ffa2fa2c7d2706c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor sign(const at::Tensor & self); +TORCH_API at::Tensor & sign_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & sign_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & sign_(at::Tensor & self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/signbit_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/signbit_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9087c6be0fc1802066052a717fe9aeb2e543468c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/signbit_native.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_signbit_out : public at::meta::structured_signbit { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +TORCH_API at::Tensor signbit_sparse(const at::Tensor & self); +TORCH_API at::Tensor & signbit_sparse_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor signbit_sparse_csr(const at::Tensor & self); +TORCH_API at::Tensor & signbit_sparse_csr_out(const at::Tensor & self, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/silu_backward_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/silu_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b41bed0b1a0c62642a8f48491425da42ad211df7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/silu_backward_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor silu_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API at::Tensor & silu_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API at::Tensor & silu_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..88511c03bcdb5d93612883302f29957ab2d1b904 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor sin(const at::Tensor & self); +TORCH_API at::Tensor & sin_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & sin_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & sin_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..455af4907f781d5bad20cade92879dc06364ecb2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_sin : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sinh_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sinh_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b5b8b9c03a532d72fabcfd4bf26de8a510e12d94 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sinh_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor sinh(const at::Tensor & self); +TORCH_API at::Tensor & sinh_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & sinh_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & sinh_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3baceec109bed7623b77493dc646052ecf9d54c0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor slice_inverse_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..aa85c91ca1a3a05160c27bfbb3ea7634f5f4190a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API slice_Tensor { + using schema = at::Tensor (const at::Tensor &, int64_t, ::std::optional, ::std::optional, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::slice"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv3d_forward_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv3d_forward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..81780890c21ed93a5a76fc5a448f00adbf6580a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv3d_forward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API slow_conv3d_forward_output { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::slow_conv3d_forward"; + static constexpr const char* overload_name = "output"; + static constexpr const char* schema_str = "slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); +}; + +struct TORCH_API slow_conv3d_forward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::slow_conv3d_forward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ff741231832496136b65e7816fa05edbdda13448 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_cuda_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor slow_conv_transpose3d(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias={}, at::IntArrayRef stride=1, at::IntArrayRef padding=0, at::IntArrayRef output_padding=0, at::IntArrayRef dilation=1); +TORCH_API at::Tensor slow_conv_transpose3d_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::SymIntArrayRef padding=c10::SymInt(0), c10::SymIntArrayRef output_padding=c10::SymInt(0), c10::SymIntArrayRef dilation=c10::SymInt(1)); +TORCH_API at::Tensor & slow_conv_transpose3d_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias={}, at::IntArrayRef stride=1, at::IntArrayRef padding=0, at::IntArrayRef output_padding=0, at::IntArrayRef dilation=1); +TORCH_API at::Tensor & slow_conv_transpose3d_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef dilation, at::Tensor & out); +TORCH_API at::Tensor & slow_conv_transpose3d_symint_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::SymIntArrayRef padding=c10::SymInt(0), c10::SymIntArrayRef output_padding=c10::SymInt(0), c10::SymIntArrayRef dilation=c10::SymInt(1)); +TORCH_API at::Tensor & slow_conv_transpose3d_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..58be7955f067343943a1fd1be8adbd93c86bab7d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/slow_conv_transpose3d_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor slow_conv_transpose3d_cpu(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias={}, at::IntArrayRef stride=1, at::IntArrayRef padding=0, at::IntArrayRef output_padding=0, at::IntArrayRef dilation=1); +TORCH_API at::Tensor & slow_conv_transpose3d_out_cpu(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef dilation, at::Tensor & out); +TORCH_API at::Tensor slow_conv_transpose3d_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias={}, at::IntArrayRef stride=1, at::IntArrayRef padding=0, at::IntArrayRef output_padding=0, at::IntArrayRef dilation=1); +TORCH_API at::Tensor & slow_conv_transpose3d_out_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef dilation, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/smooth_l1_loss_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/smooth_l1_loss_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8bf02eb52801ac8baba338bc53ddbd0e576ead17 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/smooth_l1_loss_backward_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor & smooth_l1_loss_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +TORCH_API at::Tensor & smooth_l1_loss_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/soft_margin_loss.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/soft_margin_loss.h new file mode 100644 index 0000000000000000000000000000000000000000..e4ee3a982f8894c587a227cb20fbde13cc1dcd95 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/soft_margin_loss.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & soft_margin_loss_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean) { + return at::_ops::soft_margin_loss_out::call(self, target, reduction, out); +} +// aten::soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & soft_margin_loss_outf(const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & out) { + return at::_ops::soft_margin_loss_out::call(self, target, reduction, out); +} + +// aten::soft_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor +inline at::Tensor soft_margin_loss(const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean) { + return at::_ops::soft_margin_loss::call(self, target, reduction); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..23776af941254972c4de69965d66f60d4414d13d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & softmax_outf(const at::Tensor & self, int64_t dim, ::std::optional dtype, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..afc38a5c7b9bd6fc88e57c03ffbb40f273dbb83e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor softplus(const at::Tensor & self, const at::Scalar & beta=1, const at::Scalar & threshold=20); +TORCH_API at::Tensor & softplus_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & beta=1, const at::Scalar & threshold=20); +TORCH_API at::Tensor & softplus_outf(const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3e0efb7642988d25e209b86ed1ac47a85d8416d4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor softshrink_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..d5e420cf33573cffa8007c1e0b843db4d12b72db --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_softshrink_backward : public TensorIteratorBase { + + + void meta(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..216942db7241a5dfc7d881094da2ed0e70ec1e58 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor softshrink_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b95e2e26b90241ed81e8a5f7b4089e15f9160cb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor sparse_coo_tensor(at::IntArrayRef size, at::TensorOptions options); +TORCH_API at::Tensor sparse_coo_tensor(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor & sparse_coo_tensor_out(at::Tensor & out, at::IntArrayRef size); +TORCH_API at::Tensor & sparse_coo_tensor_outf(at::IntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8e6585cc401f58a147dbed89f2239b165028994b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..555afabd0512a0388f4262153916207dce72bcd4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_airy_ai(const at::Tensor & x); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7031ad540f6474ae9161cb0c2df9c1b1a93853f0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_airy_ai_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_airy_ai { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_airy_ai"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_airy_ai(Tensor x) -> Tensor"; + static at::Tensor call(const at::Tensor & x); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x); +}; + +struct TORCH_API special_airy_ai_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_airy_ai"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_airy_ai.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j0_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j0_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..699771e1ecef5ddd145efae31370f387606e37b9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j0_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_bessel_j0(const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5c7c7d4fdf67bfacd3f80e72db80cb0d965a9fc7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_bessel_j1(const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ba34562c63782f10c7dbe6927fe3a9bc14d96ec9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_j1_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_bessel_j1(const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_bessel_j1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9a0a7525b0dacf1444ff5cf944042cb0ab9b6315 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_bessel_y1(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..d2749cc892506f6fb92732518d455941f04b1450 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_special_bessel_y1 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h new file mode 100644 index 0000000000000000000000000000000000000000..79d3d96f1a6c25df2dc481bd804614370c88b3c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Scalar & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Scalar & n) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_out::call(x, n, out); +} + +// aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Scalar & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Scalar & x, const at::Tensor & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar_out::call(x, n, out); +} + +// aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Scalar & n) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Scalar & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar_out::call(x, n, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..af6618fad256c28477f76a5195f60f0ad8a9d87b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_native.h new file mode 100644 index 0000000000000000000000000000000000000000..049de911d10d0d491fa0a7bf0840da1f32348f7f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_native.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_chebyshev_polynomial_v_out : public at::meta::structured_special_chebyshev_polynomial_v { +void impl(const at::Tensor & x, const at::Tensor & n, const at::Tensor & out); +}; +TORCH_API at::Tensor special_chebyshev_polynomial_v(const at::Scalar & x, const at::Tensor & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_out(const at::Scalar & x, const at::Tensor & n, at::Tensor & out); +TORCH_API at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Scalar & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_out(const at::Tensor & x, const at::Scalar & n, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h new file mode 100644 index 0000000000000000000000000000000000000000..2b5832f6d64bc27ca1319ff7f3f704119def8117 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_digamma(Tensor self) -> Tensor +inline at::Tensor special_digamma(const at::Tensor & self) { + return at::_ops::special_digamma::call(self); +} + +// aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_digamma_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_digamma_out::call(self, out); +} +// aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_digamma_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_digamma_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h new file mode 100644 index 0000000000000000000000000000000000000000..e914143f78369923612869201599f6b2c364e0a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_erf(Tensor self) -> Tensor +inline at::Tensor special_erf(const at::Tensor & self) { + return at::_ops::special_erf::call(self); +} + +// aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_erf_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_erf_out::call(self, out); +} +// aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_erf_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_erf_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b4f7bd67d6e8ea0b853bd639ae3bb0445642c784 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_erfcx(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d4617c9be68ece00806781b55d07d43f416b3401 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_expit(const at::Tensor & self); +TORCH_API at::Tensor & special_expit_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_expit_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..48aa9ed4872e6dcd537c1c508c206291773174d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_hermite_polynomial_h(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_hermite_polynomial_h_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_hermite_polynomial_h_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i0_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i0_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..34ecd134e870e20fe1d70f09da6ee21aa129d305 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i0_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_i0(const at::Tensor & self); +TORCH_API at::Tensor & special_i0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_i0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a716d7d3439650d2c2b59628a59ce4872925c2ee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_i1_out : public at::meta::structured_special_i1 { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a7c1ff8b20fe3c93a56aef7c38b00a894d53629f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_i1e { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_i1e"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_i1e(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_i1e_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_i1e"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_i1e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f97949f06ed009f09a4404000d5141096bc1aad5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_log_ndtr { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_log_ndtr"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_log_ndtr(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_log_ndtr_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_log_ndtr"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..df6efb5cacb85209e44e26a8445e1453110b3727 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_modified_bessel_i0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_i0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_i0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h new file mode 100644 index 0000000000000000000000000000000000000000..ff1fd407e44f94d69627f9f8cf32c6da6f2f4c02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_modified_bessel_i1(Tensor self) -> Tensor +inline at::Tensor special_modified_bessel_i1(const at::Tensor & self) { + return at::_ops::special_modified_bessel_i1::call(self); +} + +// aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_i1_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_modified_bessel_i1_out::call(self, out); +} +// aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_i1_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_modified_bessel_i1_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..78b59fde2b591306e34d536b844c7acb83b11068 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_modified_bessel_k0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..493b3e678de882fb5305fc263146a03b1f568078 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_special_modified_bessel_k0 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..127ef550df91dd5ade1287adeed7a7e8f867ac6d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0106d6a3db020ca8f61d3cf16a010be753573712 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_ndtri_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_ndtri_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b6851bd72d42a77c553fc53d36fbc26826eab13 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_ndtri_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_ndtri(const at::Tensor & self); +TORCH_API at::Tensor & special_ndtri_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_ndtri_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h new file mode 100644 index 0000000000000000000000000000000000000000..51507986a24c9c0ff92e58d65624bcb651624ac0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor special_psi(const at::Tensor & self); +TORCH_API at::Tensor & special_psi_out(const at::Tensor & self, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..181f5dd7f7e54959fe7247b47761bb2e046ce3c9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_scaled_modified_bessel_k0(const at::Tensor & x); +TORCH_API at::Tensor & special_scaled_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & x); +TORCH_API at::Tensor & special_scaled_modified_bessel_k0_outf(const at::Tensor & x, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2d0c52cdba55a8d6654d4755dad61cce5a2c2716 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_t(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_t_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_t_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..424a0d003df3216964d682d7577654626cf0560e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f759ffc8d155e6859c1e56c74f392a202c2ca9b1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_w(const at::Tensor & x, const at::Tensor & n); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..e881bb64c9a9cd3bd09a5ef904d5986cd243be79 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_special_shifted_chebyshev_polynomial_w : public TensorIteratorBase { + + + void meta(const at::Tensor & x, const at::Tensor & n); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..44145124eb42944a96f5c7871d5bc5cfb7a6b7cd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_w(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_w_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_w_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6a922dae1dffad2cf24958fbb22b3f09e07a76ba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_shifted_chebyshev_polynomial_w { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor"; + static at::Tensor call(const at::Tensor & x, const at::Tensor & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Tensor & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_x_scalar { + using schema = at::Tensor (const at::Scalar &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "x_scalar"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor"; + static at::Tensor call(const at::Scalar & x, const at::Tensor & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Scalar & x, const at::Tensor & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_n_scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "n_scalar"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor"; + static at::Tensor call(const at::Tensor & x, const at::Scalar & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Scalar & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Tensor & n, at::Tensor & out); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_x_scalar_out { + using schema = at::Tensor & (const at::Scalar &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "x_scalar_out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Scalar & x, const at::Tensor & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Scalar & x, const at::Tensor & n, at::Tensor & out); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_n_scalar_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "n_scalar_out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x, const at::Scalar & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Scalar & n, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bece7b2957f86b7f622087d22c098b441c473335 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_sinc(const at::Tensor & self); +TORCH_API at::Tensor & special_sinc_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_sinc_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..df455651be1915e78672fc427ab99bdc20537822 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor special_zeta(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor special_zeta(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9d4021e8141df70062115287e9e0fa3c62264184 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_zeta(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h new file mode 100644 index 0000000000000000000000000000000000000000..68ea4c662284118f258359c6eac7a00698bfba97 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector split_with_sizes(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, c10::fromIntArrayRefSlow(split_sizes), dim); +} +namespace symint { + template >> + ::std::vector split_with_sizes(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, c10::fromIntArrayRefSlow(split_sizes), dim); + } +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector split_with_sizes_symint(const at::Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, split_sizes, dim); +} +namespace symint { + template >> + ::std::vector split_with_sizes(const at::Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, split_sizes, dim); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3d68e8878bbcf1cf7baf723f8d658a7c8452080e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API sqrt { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sqrt(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API sqrt_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sqrt_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API sqrt_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/square_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/square_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e0894cd9c197045848ae054d8188b0a3b69b8b5c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/square_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor square(const at::Tensor & self); +TORCH_API at::Tensor & square_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & square_(at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sspaddmm_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sspaddmm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..11f6b274a7d4680508a2ebb7d8254dabfc42307c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sspaddmm_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor & sspaddmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sspaddmm_outf(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h new file mode 100644 index 0000000000000000000000000000000000000000..813e9cbc0be7c87f861411981bbe781e959d92fa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) { + return at::_ops::stft::call(self, n_fft, hop_length, win_length, window, normalized, onesided, return_complex, align_to_window); +} + +// aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) { + return at::_ops::stft_center::call(self, n_fft, hop_length, win_length, window, center, pad_mode, normalized, onesided, return_complex, align_to_window); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d8b9ac858db24721e1b5dd67b7d360776e30df96 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt); +TORCH_API at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6692b67be467ddebfec8c36ceca56e4ac4bafc08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor sub(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_outf(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & sub_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1bd8112b150f57cb27f15817f3e15113e92e26b3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor sub(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_outf(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & sub_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f63d7cf1ad6e91ed1e7656ae9309ea677595ca27 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & sum_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & sum_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/svd_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/svd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..fb362b3a39b83b0c2c2a1501577d3afda0453d76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/svd_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API svd_U { + using schema = ::std::tuple (const at::Tensor &, bool, bool, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::svd"; + static constexpr const char* overload_name = "U"; + static constexpr const char* schema_str = "svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)"; + static ::std::tuple call(const at::Tensor & self, bool some, bool compute_uv, at::Tensor & U, at::Tensor & S, at::Tensor & V); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool some, bool compute_uv, at::Tensor & U, at::Tensor & S, at::Tensor & V); +}; + +struct TORCH_API svd { + using schema = ::std::tuple (const at::Tensor &, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::svd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)"; + static ::std::tuple call(const at::Tensor & self, bool some, bool compute_uv); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool some, bool compute_uv); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2a103a66b7524213774793303834ec3fb580f39f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor swapaxes(const at::Tensor & self, int64_t axis0, int64_t axis1); +TORCH_API at::Tensor & swapaxes_(at::Tensor & self, int64_t axis0, int64_t axis1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a13d2c95cf61954a3ec134090e1b370c33be0121 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapaxes_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API swapaxes { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::swapaxes"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t axis0, int64_t axis1); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t axis0, int64_t axis1); +}; + +struct TORCH_API swapaxes_ { + using schema = at::Tensor & (at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::swapaxes_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, int64_t axis0, int64_t axis1); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, int64_t axis0, int64_t axis1); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h new file mode 100644 index 0000000000000000000000000000000000000000..fcb9bf3e777032c829e2cdc3f1928a7cb3df59c9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor swapdims(const at::Tensor & self, int64_t dim0, int64_t dim1) { + return at::_ops::swapdims::call(self, dim0, dim1); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_constrain_range_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_constrain_range_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..89201e1f5651efd434208a5523f14d53f50e3bf3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_constrain_range_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API void sym_constrain_range(const at::Scalar & size, ::std::optional min=::std::nullopt, ::std::optional max=::std::nullopt); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h new file mode 100644 index 0000000000000000000000000000000000000000..23d4d50729290acad14c7102c8a1a513aeb5635f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API c10::SymBool sym_is_contiguous(const at::Tensor & self, at::MemoryFormat memory_format=c10::MemoryFormat::Contiguous); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fb006b36caa729d58e415f802ae2f7e946ff0211 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API c10::SymInt sym_numel(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29c405ebfb1d4ea983134d625e0017fb80b8f1ce --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API sym_numel { + using schema = c10::SymInt (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sym_numel"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sym_numel(Tensor self) -> SymInt"; + static c10::SymInt call(const at::Tensor & self); + static c10::SymInt redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9df744d7360926503994ed92d834e1c216bc5f00 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API c10::SymInt sym_stride(const at::Tensor & self, int64_t dim); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h new file mode 100644 index 0000000000000000000000000000000000000000..279fa220f89888f80b525ace9401c8606dedac44 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API c10::SymInt sym_stride(const at::Tensor & self, int64_t dim); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/take.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/take.h new file mode 100644 index 0000000000000000000000000000000000000000..182131798de6673c3234bcc7827ede09fbebdb79 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/take.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & take_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & index) { + return at::_ops::take_out::call(self, index, out); +} +// aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & take_outf(const at::Tensor & self, const at::Tensor & index, at::Tensor & out) { + return at::_ops::take_out::call(self, index, out); +} + +// aten::take(Tensor self, Tensor index) -> Tensor +inline at::Tensor take(const at::Tensor & self, const at::Tensor & index) { + return at::_ops::take::call(self, index); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..aeed362cbccd673b85bd46ec58fedd7a14d29d22 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_tan : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..69d098be3bcd9b739341838a50684de6a75a41ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API tan { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "tan(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API tan_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "tan_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API tan_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..36778a5ae6b21778f24a495c2710ee2e9ec14a5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor tanh(const at::Tensor & self); +TORCH_API at::Tensor & tanh_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d3a895b128ef051143517fd139efb1f24355bd08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor tanh(const at::Tensor & self); +TORCH_API at::Tensor & tanh_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & tanh_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & tanh_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tile_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tile_native.h new file mode 100644 index 0000000000000000000000000000000000000000..909e71df7f70bfc96976628c2bf0c692a0d94b28 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/tile_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor tile_symint(const at::Tensor & self, c10::SymIntArrayRef dims); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_dense_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_dense_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6bb78f8a32c655c6cf6bfc0f7a6a5e2aad330dc6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_dense_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor to_dense_backward(const at::Tensor & grad, const at::Tensor & input, ::std::optional masked_grad=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..764536ea189c74634443a5046ff8f7309b2a1f17 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +namespace symint { + template >> + at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt); + } +} + +namespace symint { + template >> + at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor::call(self, padding, output_size); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_symint_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_symint_outf(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsc.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsc.h new file mode 100644 index 0000000000000000000000000000000000000000..58e2fbf5e2fa59e2adce0f4dc410540304f6b434 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsc.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsr_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsr_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b0e4eda21e86a5f07c4500d4cffded2d248555fa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_bsr_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..30a3c4d3f7365c6b5ed5a07857c29de8f273541a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3a8f54e1eb91c405493aaeea1175c82119de161a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple topk(const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint(const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_outf(const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); +TORCH_API ::std::tuple topk_symint_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint_outf(const at::Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/trunc_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/trunc_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6b20fa2645f3ad068e9a099094128371c279981d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/trunc_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor trunc(const at::Tensor & self); +TORCH_API at::Tensor & trunc_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & trunc_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & trunc_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind.h new file mode 100644 index 0000000000000000000000000000000000000000..c39cd96810859ddeec1e3feb883843be951e7b2f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[] +inline ::std::vector unbind(const at::Tensor & self, int64_t dim=0) { + return at::_ops::unbind_int::call(self, dim); +} + +// aten::unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[] +inline ::std::vector unbind(const at::Tensor & self, at::Dimname dim) { + return at::_ops::unbind_Dimname::call(self, dim); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..08691cf980080a3d24c4d6cebb922addb9aa2653 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector unbind(const at::Tensor & self, int64_t dim=0); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_copy_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8502cb867efff450442694da161bd785fae7fc62 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unbind_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::vector unbind_copy(const at::Tensor & self, int64_t dim=0); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c8492037c22d847d2ed7aa94b6d59be9f18e7b8e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API unflatten_int { + using schema = at::Tensor (const at::Tensor &, int64_t, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::unflatten"; + static constexpr const char* overload_name = "int"; + static constexpr const char* schema_str = "unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); +}; + +struct TORCH_API unflatten_Dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, c10::SymIntArrayRef, at::DimnameList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::unflatten"; + static constexpr const char* overload_name = "Dimname"; + static constexpr const char* schema_str = "unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..12081d1c029a9ed0f455f9f068d4503b22bcbb92 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor +inline at::Tensor unfold_backward(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step); +} +namespace symint { + template >> + at::Tensor unfold_backward(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step); + } +} + +// aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor +inline at::Tensor unfold_backward_symint(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, input_sizes, dim, size, step); +} +namespace symint { + template >> + at::Tensor unfold_backward(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, input_sizes, dim, size, step); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_symint_out(at::Tensor & out, const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_symint_outf(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/uniform_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/uniform_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7ae6ca9f91f9bc4639f81af847a8621ced9d3e66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/uniform_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor uniform(const at::Tensor & self, double from=0, double to=1, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & uniform_out(at::Tensor & out, const at::Tensor & self, double from=0, double to=1, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & uniform_outf(const at::Tensor & self, double from, double to, ::std::optional generator, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_consecutive.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_consecutive.h new file mode 100644 index 0000000000000000000000000000000000000000..4aa712912a00128867cceedc789833c0dda2e747 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_consecutive.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::unique_consecutive(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None) -> (Tensor, Tensor, Tensor) +inline ::std::tuple unique_consecutive(const at::Tensor & self, bool return_inverse=false, bool return_counts=false, ::std::optional dim=::std::nullopt) { + return at::_ops::unique_consecutive::call(self, return_inverse, return_counts, dim); +} + +// aten::unique_consecutive.out(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple unique_consecutive_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & self, bool return_inverse=false, bool return_counts=false, ::std::optional dim=::std::nullopt) { + return at::_ops::unique_consecutive_out::call(self, return_inverse, return_counts, dim, out0, out1, out2); +} +// aten::unique_consecutive.out(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple unique_consecutive_outf(const at::Tensor & self, bool return_inverse, bool return_counts, ::std::optional dim, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::unique_consecutive_out::call(self, return_inverse, return_counts, dim, out0, out1, out2); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_dim_consecutive_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_dim_consecutive_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cb7f5e149f31ebc48f9017998dae9048401dc261 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unique_dim_consecutive_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple unique_dim_consecutive_out(const at::Tensor & self, int64_t dim, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple unique_dim_consecutive_cpu(const at::Tensor & self, int64_t dim, bool return_inverse=false, bool return_counts=false); +TORCH_API ::std::tuple unique_dim_consecutive_cuda(const at::Tensor & self, int64_t dim, bool return_inverse=false, bool return_counts=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unsafe_split_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unsafe_split_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f0b07adeb3299fd23122a1ae3175fe8550810a5d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/unsafe_split_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector unsafe_split(const at::Tensor & self, int64_t split_size, int64_t dim=0); +TORCH_API ::std::vector unsafe_split_symint(const at::Tensor & self, c10::SymInt split_size, int64_t dim=0); +TORCH_API void unsafe_split_out(at::TensorList out, const at::Tensor & self, int64_t split_size, int64_t dim=0); +TORCH_API void unsafe_split_outf(const at::Tensor & self, int64_t split_size, int64_t dim, at::TensorList out); +TORCH_API void unsafe_split_symint_out(at::TensorList out, const at::Tensor & self, c10::SymInt split_size, int64_t dim=0); +TORCH_API void unsafe_split_symint_outf(const at::Tensor & self, c10::SymInt split_size, int64_t dim, at::TensorList out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bicubic2d_backward_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bicubic2d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..0333a84df7da19ecc48abe14f0376c712dbc464a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bicubic2d_backward_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_upsample_bicubic2d_backward_out_cpu : public at::meta::structured_upsample_bicubic2d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +struct TORCH_API structured_upsample_bicubic2d_backward_out_cuda : public at::meta::structured_upsample_bicubic2d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..dd09103fab6e507446596c770b57a4f4ca8022bb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor upsample_bilinear2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor upsample_bilinear2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc556695c6c0d95f8c1fc6692c25d51db00eb6e7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & upsample_bilinear2d_out(at::Tensor & out, const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_bilinear2d_outf(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors, at::Tensor & out); +TORCH_API at::Tensor & upsample_bilinear2d_symint_out(at::Tensor & out, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_bilinear2d_symint_outf(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e13cebb02eeebe3e9b8b96ea87230090e4e30341 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_cuda_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor upsample_linear1d(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor upsample_linear1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales, at::Tensor & out); +TORCH_API at::Tensor & upsample_linear1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c909c54eef262420323e148c2b63a5f2d1dc72ee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_linear1d_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor upsample_linear1d(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor upsample_linear1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales, at::Tensor & out); +TORCH_API at::Tensor & upsample_linear1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & upsample_linear1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest1d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest1d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3a169bc0d2ede249cfce05a2107a2c118323a86f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest1d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor upsample_nearest1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor upsample_nearest1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..38fc0a9d028a242fc127e6b4c734b99ef4e7e98b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_backward.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & upsample_nearest2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & upsample_nearest2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); + } +} + +// aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & upsample_nearest2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & upsample_nearest2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); + } +} + +// aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & upsample_nearest2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & upsample_nearest2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); + } +} + +// aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & upsample_nearest2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & upsample_nearest2d_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::upsample_nearest2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); + } +} + +// aten::upsample_nearest2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor upsample_nearest2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor upsample_nearest2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w); + } +} + +// aten::upsample_nearest2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor upsample_nearest2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor upsample_nearest2d_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::upsample_nearest2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..0e31a98531ddd57e726e2792ff902c9e26d3f959 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_upsample_nearest2d : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..96c8b6c16ab718f28e88c1cff8a1353f895f168d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor upsample_nearest2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_nearest2d_vec_out_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors, at::Tensor & out); +struct TORCH_API structured_upsample_nearest2d_out_cpu : public at::meta::structured_upsample_nearest2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured_upsample_nearest2d_out_cuda : public at::meta::structured_upsample_nearest2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +TORCH_API at::Tensor upsample_nearest2d_quantized_cpu(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_trilinear3d_backward_meta.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_trilinear3d_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..4812b4a9b6ce70215c947356f279f3baa0555d28 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_trilinear3d_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_upsample_trilinear3d_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e7f99054990b9dff40eb5054f41c5a301724b069 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_compositeimplicitautograd_dispatch.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor var(const at::Tensor & self, bool unbiased); +TORCH_API at::Tensor var(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false); +TORCH_API at::Tensor & var_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false); +TORCH_API at::Tensor & var_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim, at::Tensor & out); +TORCH_API at::Tensor var(const at::Tensor & self, at::DimnameList dim, bool unbiased, bool keepdim=false); +TORCH_API at::Tensor & var_out(at::Tensor & out, const at::Tensor & self, at::DimnameList dim, bool unbiased, bool keepdim=false); +TORCH_API at::Tensor & var_outf(const at::Tensor & self, at::DimnameList dim, bool unbiased, bool keepdim, at::Tensor & out); +TORCH_API at::Tensor var(const at::Tensor & self, at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & var_out(at::Tensor & out, const at::Tensor & self, at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & var_outf(const at::Tensor & self, at::DimnameList dim, const ::std::optional & correction, bool keepdim, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..77a5f6d4c6556bb4e1c47d4c1b1c3a5610c35c76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple var_mean(const at::Tensor & self, bool unbiased); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::DimnameList dim, bool unbiased, bool keepdim=false); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vdot_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vdot_native.h new file mode 100644 index 0000000000000000000000000000000000000000..77d5e6fa43bb8a0c009599b06de3c523dbfb99a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vdot_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & vdot_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor vdot(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor vdot_cuda(const at::Tensor & self, const at::Tensor & other); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..643cb7388a3e7d644ada67c5e045b8fb53317e1d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & view_as_complex_copy_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor view_as_complex_copy(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d9abbee7ce30a60d6ca25a9b6914c8246a7cde19 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_complex_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API view_as_complex_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::view_as_complex_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "view_as_complex_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API view_as_complex_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::view_as_complex_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "view_as_complex_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e1a51f15bd65c99588600e68e5ce5fc25afacbe9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor view_as(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_copy_compositeexplicitautogradnonfunctional_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..dfd4aa3c7a8676ab9a13aa42d753b28072af240c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor view_as_real_copy(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_native.h new file mode 100644 index 0000000000000000000000000000000000000000..23288e332c24570555ba118ecde4f2946158e5fe --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_real_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor view_as_real(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vsplit_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vsplit_native.h new file mode 100644 index 0000000000000000000000000000000000000000..721292f36174bf5877e42ec48559c1acbbc9df8e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/vsplit_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector vsplit(const at::Tensor & self, int64_t sections); +TORCH_API ::std::vector vsplit(const at::Tensor & self, at::IntArrayRef indices); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/xlogy_cpu_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/xlogy_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..09707c8e2fd23ae1ee8b42fac029851ea237a3af --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/xlogy_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor xlogy(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & xlogy_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & xlogy_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & xlogy_(at::Tensor & self, const at::Tensor & other); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_cuda_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2fb750d56f9e4b157ba3769324e02d3bfc2c5dc0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor & zero_(at::Tensor & self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_meta_dispatch.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d973fe0c229d1748d9f9b83e281b897a7539318c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zero_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & zero_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like.h new file mode 100644 index 0000000000000000000000000000000000000000..4329bee7d593f710bb4bce1a8e2858387ab682e0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor zeros_like(const at::Tensor & self, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt) { + return at::_ops::zeros_like::call(self, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} +// aten::zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor zeros_like(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format) { + return at::_ops::zeros_like::call(self, dtype, layout, device, pin_memory, memory_format); +} + +// aten::zeros_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & zeros_like_out(at::Tensor & out, const at::Tensor & self, ::std::optional memory_format=::std::nullopt) { + return at::_ops::zeros_like_out::call(self, memory_format, out); +} +// aten::zeros_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & zeros_like_outf(const at::Tensor & self, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::zeros_like_out::call(self, memory_format, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like_native.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2636829546dc5b3b208087a9113ad8f55d461035 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/ops/zeros_like_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor zeros_like(const at::Tensor & self, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & zeros_like_out(const at::Tensor & self, ::std::optional memory_format, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..036c73e6760f565f0d58dbe1b76f9e339e4a5a64 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace at { + +/** + * QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which + * specifies the quantization scheme and parameters, for more information please + * see ATen/quantized/Quantizer.h + * + * We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl. + */ +struct TORCH_API QTensorImpl : public c10::TensorImpl { + public: + QTensorImpl( + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + // See Note [Enum ImplType] + QTensorImpl( + ImplType type, + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + + // TODO: Expose in PyTorch Frontend + QuantizerPtr quantizer() { + return quantizer_; + } + + void set_quantizer_(QuantizerPtr quantizer) { + quantizer_ = quantizer; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/version_counter, + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/std::move(version_counter), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Shallow-copies data from another TensorImpl into this TensorImpl. + * + * For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + void shallow_copy_from(const c10::intrusive_ptr& impl) override { + AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set())); + auto q_impl = static_cast(impl.get()); + copy_tensor_metadata( + /*src_q_impl=*/q_impl, + /*dest_q_impl=*/this, + /*version_counter=*/version_counter(), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); + refresh_numel(); + refresh_contiguous(); + } + + private: + QuantizerPtr quantizer_; + + const char* tensorimpl_type_name() const override; + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / storage_offset) + * from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const QTensorImpl* src_q_impl, + QTensorImpl* dest_q_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) { + TensorImpl::copy_tensor_metadata(src_q_impl, dest_q_impl, version_counter, allow_tensor_metadata_change); + + // OpaqueTensorImpl-specific fields. + dest_q_impl->quantizer_ = src_q_impl->quantizer_; + } +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h new file mode 100644 index 0000000000000000000000000000000000000000..787f69064348d095ec856205b15a69172194c44b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h @@ -0,0 +1,284 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +namespace at { + +/** + * UnknownQuantizer is a placeholder quantizer for functions that implement + * quantization in a two step process. First a tensor is allocated but with + * unknown quantizer, and then the quantization kernel decides what the final + * quantizer will be. + */ +struct TORCH_API UnknownQuantizer : public Quantizer { + explicit UnknownQuantizer(ScalarType scalar_type) + : Quantizer(scalar_type) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + QScheme qscheme() const override; + bool equalTo(QuantizerPtr other) const override; +}; + +/** + * UniformQuantizer is the parent class for all uniform quantizers. + * These quantization scheme will map float value uniformly to + * the quantized value. For example, affine quantizer is + * the most commonly used scheme in this category. + */ +struct TORCH_API UniformQuantizer : public Quantizer { + explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +/** + * NonUniformQuantizer is the parent class for all non-uniform quantizers. + * These quantization scheme may map float value non-uniformly to the quantized + * value. K-means quantization is a representative example in this category. + */ +struct TORCH_API NonUniformQuantizer : public Quantizer { + explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +// There is also StochasticQuantizer which is uniform but not affine + +/** + * AffineQuantizer uses affine transformation to do quantization. + * + * For quantize: + * Y = clamp(round(X / scale + zero_point), min, max) + * For dequantize: + * X = (Y - zero_point) * scale + */ +struct TORCH_API AffineQuantizer : public UniformQuantizer { + explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {} +}; + +// Note that we will not have Symmetric Quantizer in backend to reduce +// complications in quantized kernel implementation. + +/** + * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for + * all the values in the Tensor. + */ +struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer { + explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point) + : AffineQuantizer(scalar_type), + scale_(scale), + zero_point_(zero_point) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + QScheme qscheme() const override { + return kPerTensorAffine; + } + + double scale() const { + return scale_; + } + + int64_t zero_point() const { + return zero_point_; + } + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerTensorAffine) { + return false; + } + auto* other_per_tensor_affine = + static_cast(other.get()); + return scalar_type() == other_per_tensor_affine->scalar_type() && + scale() == other_per_tensor_affine->scale() && + zero_point() == other_per_tensor_affine->zero_point(); + } + + private: + const double scale_; + // We use int64_t for consistency with Python + const int64_t zero_point_; +}; + +/** + * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer + * except that we have an independent scale and zero_point parameter + * for each channel. + * + * Also note that per channel quantization is mostly applied to output channels + * of weights since per-input channel of weight quantization or per-channel + * quantization for activations can't be efficiently supported in most of + * processors since it requires each multiplication result within a single + * dot-product to have a different scale. + */ +struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer { + explicit PerChannelAffineQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : AffineQuantizer(scalar_type), + scales_(std::move(scales)), + zero_points_(std::move(zero_points)), + axis_(axis) {} + + QScheme qscheme() const override { + return kPerChannelAffine; + } + + Tensor scales() const { + return scales_; + } + + Tensor zero_points() const { + return zero_points_; + } + + int64_t axis() const { + return axis_; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffine) { + return false; + } + auto* other_per_channel_affine = + static_cast(other.get()); + return scalar_type() == other_per_channel_affine->scalar_type() && + scales().equal(other_per_channel_affine->scales()) && + zero_points().equal(other_per_channel_affine->zero_points()) && + axis() == other_per_channel_affine->axis(); + } + + protected: + Tensor scales_; + Tensor zero_points_; + const int64_t axis_; +}; + +/** + * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer + * except that it expects both scale and zero point to be floating point values. + * + * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of + * kPerChannelAffine. + * + * The quantize equation in this case looks like - + * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale + * + * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to + * be exactly represented in the quantized space. We can get additional precision by + * using floating point values for zero point. + */ +struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer { + explicit PerChannelAffineFloatQParamsQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : PerChannelAffineQuantizer(scalar_type, + scales, + zero_points, + axis) {} + + QScheme qscheme() const override { + return kPerChannelAffineFloatQParams; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) { + return false; + } + auto* other_per_channel_float_qparams = + static_cast(other.get()); + return scalar_type() == other_per_channel_float_qparams->scalar_type() && + scales().equal(other_per_channel_float_qparams->scales()) && + zero_points().equal(other_per_channel_float_qparams->zero_points()) && + axis() == other_per_channel_float_qparams->axis(); + } +}; + +// This is an internal utility function for getting at the QTensorImpl, +// You should only use this for writing low level +// setters/getters for QTensorImpl fields; otherwise, you should use +// the low level setters/getters that were implemented using this. +// This may be called repeatedly, so make sure it's pretty cheap. +TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self); + +// double and int64_t are because of the native function API, we only have these +// argument types right now in native functions +TORCH_API QuantizerPtr +make_per_tensor_affine_quantizer( + double scale, int64_t zero_point, ScalarType scalar_type); + +TORCH_API QuantizerPtr make_per_channel_affine_quantizer( + const Tensor& scales, + const Tensor& zero_points, + int64_t axis, + ScalarType scalar_type); + +TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type); + +// Create a Quantized Tensor given arguments for normal Tensor and a quantizer +TORCH_API Tensor new_qtensor( + IntArrayRef sizes, + const TensorOptions& options, + QuantizerPtr quantizer); + +TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + IntArrayRef strides, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_channel_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const Tensor& scales, + const Tensor& zero_points, + const int64_t axis, + const TensorOptions& options); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h new file mode 100644 index 0000000000000000000000000000000000000000..711889ae4f43e9ecceb6637206fe67acfcf063d0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h @@ -0,0 +1,259 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::data { +template +class DataLoaderBase { + public: + using BatchType = Batch; + using BatchRequestType = BatchRequest; + + /// Constructs a new DataLoader from a `dataset` to sample from, `options` + /// to configure the DataLoader with, and a `sampler` that specifies the + /// sampling strategy. + DataLoaderBase( + DataLoaderOptions options, + std::unique_ptr main_thread_dataset = nullptr) + : options_(options), + main_thread_dataset_(std::move(main_thread_dataset)), + sequencer_(new_sequencer()) {} + + DataLoaderBase(const DataLoaderBase&) = delete; + DataLoaderBase(DataLoaderBase&&) = delete; + DataLoaderBase& operator=(const DataLoaderBase&) = delete; + DataLoaderBase& operator=(DataLoaderBase&&) = delete; + // NOLINTNEXTLINE(bugprone-exception-escape) + virtual ~DataLoaderBase() { + join(); + } + + /// Returns an iterator into the DataLoader. The lifetime of the iterator is + /// bound to the DataLoader. In C++ standards language, the category of the + /// iterator is `OutputIterator`. See + /// https://en.cppreference.com/w/cpp/named_req/OutputIterator for what this + /// means. In short: you may increment the iterator and dereference it, but + /// cannot go back, or step forward more than one position at a time. When the + /// DataLoader is exhausted, it will compare equal with the special + /// "sentinel" iterator returned by `DataLoader::end()`. Most of the time, you + /// should only use range-for loops to loop over the DataLoader, but + /// standard algorithms like `std::copy(dataloader.begin(), dataloader.end(), + /// output_iterator)` are supported too. + Iterator begin() { + TORCH_CHECK( + shuttle_.in_flight_jobs() == 0, + "Attempted to get a new DataLoader iterator " + "while another iterator is not yet exhausted"); + reset(); + return Iterator(std::make_unique>( + [this] { return this->next(); })); + } + + /// Returns a special "sentinel" iterator that compares equal with a + /// non-sentinel iterator once the DataLoader is exhausted. + Iterator end() { + return Iterator(std::make_unique>()); + } + + /// Joins the DataLoader's worker threads and drains internal queues. + /// This function may only be invoked from the main thread (in which the + /// DataLoader lives). + void join() { + if (joined_) { + return; + } + shuttle_.drain(); + // Send one 'quit' message per worker. Since a worker dies (exits its + // thread) after receiving this message, each `QuitWorker()` message will be + // read by exactly one worker. + for ([[maybe_unused]] const auto w : c10::irange(options_.workers)) { + push_job(QuitWorker()); + } + for (auto& worker : workers_) { + worker.join(); + } + joined_ = true; + } + + /// Returns the options with which the DataLoader was configured. + const FullDataLoaderOptions& options() const noexcept { + return options_; + } + + protected: + /// Simple mix-in to give something a sequence number. + struct Sequenced { + Sequenced() = default; + Sequenced(size_t sqn) : sequence_number(sqn) {} + size_t sequence_number; + }; + + struct QuitWorker {}; + + /// A `Job` is either a `BatchRequest` (new indices to fetch data at) or a + /// `QuitWorker` object, to indicate the worker should shut down. + struct Job : Sequenced { + Job() = default; + Job(QuitWorker q, size_t sqn) : Sequenced(sqn), quit(q) {} + Job(BatchRequest&& i, size_t sqn) + : Sequenced(sqn), batch_request(std::move(i)) {} + std::optional quit; + std::optional batch_request; + }; + + /// The finished result of a job. + struct Result : Sequenced { + Result() = default; + Result(std::optional&& b, size_t sqn) + : Sequenced(sqn), batch(std::move(b)) {} + Result(std::exception_ptr exception, size_t sqn) + : Sequenced(sqn), exception(std::move(exception)) {} + std::optional batch; + std::exception_ptr exception; + }; + + /// Subclass hook for getting the next batch request. The stateless case will + /// ask the sampler for a new batch request (e.g. a vector of indices), while + /// the stateful one will simply return the batch size. + virtual std::optional get_batch_request() = 0; + + /// Resets the internal state of the DataLoader, optionally pre-fetching + /// new jobs. + virtual void reset() { + shuttle_.drain(); + sequence_number_ = 0; + sequencer_ = new_sequencer(); + prefetch(); + } + + /// Schedules `requested_jobs` many new batches to be fetched. The actual + /// number of jobs scheduled may be less if the DataLoader exhausts. + void prefetch(size_t requested_jobs) { + for ([[maybe_unused]] const auto r : c10::irange(requested_jobs)) { + if (auto batch_request = get_batch_request()) { + this->push_job(std::move(*batch_request)); + } else { + break; + } + } + } + + /// Schedules the maximum number of jobs (based on the `max_jobs` option). + void prefetch() { + prefetch(options_.max_jobs); + } + + /// Returns the next batch of data, or an empty `optional` if the DataLoader + /// is exhausted. This operation will block until a batch is available if one + /// is still expected. + std::optional next() { + if (options_.workers > 0) { + while (std::optional result = this->pop_result()) { + if (result->exception) { + throw WorkerException(result->exception); + } else if (result->batch) { + prefetch(1); + return std::move(result->batch); + } + } + } else if (auto batch_request = get_batch_request()) { + return this->main_thread_dataset_->get_batch(std::move(*batch_request)); + } + return std::nullopt; + } + + /// The function that worker threads run. + void worker_thread(Dataset& dataset) { + while (true) { + auto job = shuttle_.pop_job(); + if (job.quit) { + break; + } + try { + auto batch = dataset.get_batch(std::move(*job.batch_request)); + shuttle_.push_result({std::move(batch), job.sequence_number}); + } catch (...) { + shuttle_.push_result({std::current_exception(), job.sequence_number}); + } + } + } + + /// Convenience method that calls `shuttle_.push_job()` with the next sequence + /// number. + template + void push_job(T value) { + shuttle_.push_job({std::move(value), sequence_number_++}); + } + + /// Convenience method that gets the next result from the sequencer. + std::optional pop_result() { + return sequencer_->next( + [this] { return this->shuttle_.pop_result(this->options_.timeout); }); + } + + /// Convenience method that creates a new sequencer based on the + /// `enforce_ordering` option. + std::unique_ptr> new_sequencer() { + if (options_.enforce_ordering) { + return std::make_unique>( + options_.max_jobs); + } + return std::make_unique>(); + } + + /// The options the DataLoader was configured with. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const FullDataLoaderOptions options_; + + /// The dataset for the main thread, only has a value if the number of + /// worker threads was configured as zero, meaning the main thread has to do + /// all the work (synchronously). NOTE: Really want this to be on the heap + /// when empty, therefore `unique_ptr` and not `optional`. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr main_thread_dataset_; + + /// The sequence number for the *next* batch to be retrieved from the + /// dataset. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t sequence_number_ = 0; + + /// The worker threads, running the `worker_thread()` method. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector workers_; + + /// The `DataShuttle` which takes care of the life cycle of a job. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + detail::DataShuttle shuttle_; + + /// The `Sequencer`, which handles optional ordering of batches. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr> sequencer_; + + /// True if the DataLoader has joined its worker threads. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool joined_ = false; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h new file mode 100644 index 0000000000000000000000000000000000000000..9e22c3f23d12c74d0bf272f89dcc5360c6dbc131 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::data { + +/// A dataloader for stateful datasets. +/// +/// A dataloader for stateful datatasets differs from one for stateless +/// datasets one in that the dataset is shared among worker threads, and that +/// this dataset is itself responsible for producing batches rather than +/// depending on a sampler. The statefulness here actually refers to the +/// dataset. The StatefulDataLoader simply alters the data loading algorithm to +/// accommodate the stateful, shared nature of the dataset. Note that the +/// dataset must be thread safe if more than one worker thread is used. +/// +/// A stateful dataloader is created by calling `make_data_loader` with a +/// stateful dataset. +template +class StatefulDataLoader : public DataLoaderBase< + Dataset, + typename Dataset::BatchType::value_type, + typename Dataset::BatchRequestType> { + public: + using super = DataLoaderBase< + Dataset, + typename Dataset::BatchType::value_type, + typename Dataset::BatchRequestType>; + using typename super::BatchRequestType; + + /// Constructs the `StatefulDataLoader` from a `dataset` and some `options`. + StatefulDataLoader(Dataset dataset, DataLoaderOptions options) + : super(options, std::make_unique(std::move(dataset))) { + for ([[maybe_unused]] const auto _ : c10::irange(this->options_.workers)) { + // As opposed to the stateless case, here all worker threads access the + // same underlying dataset. + this->workers_.emplace_back( + [this] { this->worker_thread(*this->main_thread_dataset_); }); + } + } + + private: + /// Resets the internal state of the dataloader and the dataset. + void reset() override { + this->main_thread_dataset_->reset(); + // Call the base class method last because it calls `prefetch()` + super::reset(); + } + + /// For stateful datasets, the batch request is always the batch size. The + /// dataset is responsible for determining what goes into the batch next. + std::optional get_batch_request() override { + return this->options_.batch_size; + } +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h new file mode 100644 index 0000000000000000000000000000000000000000..f439bd2e151b4cf2b5557a162276c37b13110dc4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +namespace torch::data { + +/// A dataloader for stateless datasets. +/// +/// This dataloader follows the traditional PyTorch dataloader design, whereby a +/// (possibly) stateful sampler produces *batch requests* for a stateless +/// dataset, which acts as a simple batch request to batch mapping. The batch +/// request will often be an array of indices, and if the dataset is a simple +/// image dataset, the dataset would produce the images at those indices. +template +class StatelessDataLoader : public DataLoaderBase< + Dataset, + typename Dataset::BatchType, + typename Sampler::BatchRequestType> { + public: + using super = DataLoaderBase< + Dataset, + typename Dataset::BatchType, + typename Sampler::BatchRequestType>; + using typename super::BatchRequestType; + + /// Constructs the `StatelessDataLoader` from a `dataset`, a `sampler` and + /// some `options`. + StatelessDataLoader( + Dataset dataset, + Sampler sampler, + DataLoaderOptions options) + : super(options), sampler_(std::move(sampler)) { + for (const auto w : c10::irange(this->options_.workers)) { + // Here we copy the dataset into the worker thread closure. Each worker + // has its own copy of the dataset. This means the dataset must be + // trivially copiable, or else we don't expect more than one worker to + // be in use. + (void)w; // Suppress unused variable warning + this->workers_.emplace_back( + [this, dataset]() mutable { this->worker_thread(dataset); }); + } + if (this->options_.workers == 0) { + this->main_thread_dataset_ = + std::make_unique(std::move(dataset)); + } + } + + private: + /// Resets the internal state of the dataloader and the sampler. + void reset() override { + sampler_.reset(); + // Call the base class method last because it calls `prefetch()` + super::reset(); + } + + /// Queries the sampler for the next batch request (possibly progressing its + /// internal state). + std::optional get_batch_request() override { + auto indices = sampler_.next(this->options_.batch_size); + if (!indices || + (indices->size() < this->options_.batch_size && + this->options_.drop_last)) { + return std::nullopt; + } + AT_ASSERT(indices->size() > 0); + return indices; + } + + /// The `Sampler` used to produce batch requests. + Sampler sampler_; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h new file mode 100644 index 0000000000000000000000000000000000000000..fd8fb7471710bd1f96bed17cd8bf722272879639 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h @@ -0,0 +1,101 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace torch::data::datasets { +template +class MapDataset; +template +MapDataset map(D, T); // NOLINT +} // namespace torch::data::datasets + +namespace torch::data::datasets { +namespace detail { +template +struct is_optional : std::false_type {}; +template +struct is_optional> : std::true_type {}; +} // namespace detail + +/// A dataset that can yield data only in batches. +template < + typename Self, + typename Batch = std::vector>, + typename BatchRequest = ArrayRef> +class BatchDataset { + public: + using SelfType = Self; + using BatchType = Batch; + using BatchRequestType = BatchRequest; + constexpr static bool is_stateful = detail::is_optional::value; + + virtual ~BatchDataset() = default; + + /// Returns a batch of data given an index. + virtual Batch get_batch(BatchRequest request) = 0; + + /// Returns the size of the dataset, or an empty std::optional if it is + /// unsized. + virtual std::optional size() const = 0; + + /// Creates a `MapDataset` that applies the given `transform` to this dataset. + template + MapDataset map(TransformType transform) & { + return datasets::map(static_cast(*this), std::move(transform)); + } + + /// Creates a `MapDataset` that applies the given `transform` to this dataset. + template + MapDataset map(TransformType transform) && { + return datasets::map( + std::move(static_cast(*this)), std::move(transform)); + } +}; + +/// A dataset that can yield data in batches, or as individual examples. +/// +/// A `Dataset` is a `BatchDataset`, because it supports random access and +/// therefore batched access is implemented (by default) by calling the random +/// access indexing function for each index in the requested batch of indices. +/// This can be customized. +template > +class Dataset : public BatchDataset> { + public: + using ExampleType = SingleExample; + + /// Returns the example at the given index. + virtual ExampleType get(size_t index) = 0; + + /// Returns a batch of data. + /// The default implementation calls `get()` for every requested index + /// in the batch. + std::vector get_batch(ArrayRef indices) override { + std::vector batch; + batch.reserve(indices.size()); + for (const auto i : indices) { + batch.push_back(get(i)); + } + return batch; + } +}; + +/// A `StreamDataset` represents a dataset that is a potentially infinite +/// stream. It takes as batch index only a number, which is the batch size, and +/// yields that many elements from the stream. +template >> +using StreamDataset = BatchDataset; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h new file mode 100644 index 0000000000000000000000000000000000000000..78d57e7d88d0d9d8235f3d985b6e37fa26afbe0b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h @@ -0,0 +1,532 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::data::datasets { + +/// Interface for chunk reader, which performs data chunking and reading of +/// entire chunks. +/// +/// A chunk could be an entire file, such as an audio data file or an image, +/// or part of a file in the case of a large text-file split based on seek +/// positions. +template < + typename ExampleType_, + typename ChunkType_ = std::vector> +class ChunkDataReader { + public: + virtual ~ChunkDataReader() = default; + + using ChunkType = ChunkType_; + using ExampleType = ExampleType_; + + /// Read an entire chunk. + virtual ChunkType read_chunk(size_t chunk_index) = 0; + + /// Returns the number of chunks available in this reader. + virtual size_t chunk_count() = 0; + + /// This will clear any internal state associate with this reader. + virtual void reset() = 0; +}; + +namespace detail { +/// BatchDataBuffer manages a queue of UnwrappedBatchData. After a new chunk is +/// loaded, BatchDataBuffer splits it into small batches and push them into the +/// queue. When get_batch is called from data loader, it pops cached batches and +/// return. If the cache is empty, it either waits to load more chunks or return +/// null if all chunks are loaded. +template < + typename UnwrappedBatch, + typename ExampleSampler = samplers::RandomSampler> +class BatchDataBuffer { + public: + using UnwrappedBatchType = UnwrappedBatch; + using BatchType = std::optional; + using BatchRequestType = typename ExampleSampler::BatchRequestType; + + BatchDataBuffer( + size_t batch_size, + ExampleSampler& example_sampler, + size_t queue_capacity) + : batch_size_(batch_size), + example_sampler_(example_sampler), + queue_capacity_(queue_capacity) {} + + /// Return batch data from the queue. Called from the ChunkDataset main + /// thread. + BatchType get_batch() { + std::unique_lock lock(queue_mutex_); + cv_read_.wait(lock, [this] { + // wait till there is available data in the queue or if all chunks are + // loaded (i.e. the dataset is exhausted for this epoch) + return ( + this->total_example_count_in_queue_ >= batch_size_ || this->stop_); + }); + if (batch_queue_.empty()) { + AT_ASSERT(stop_); + // All batches have been retrieved. Return an empty batch. + return std::nullopt; + } + + UnwrappedBatchData batch = std::move(batch_queue_.front()); + batch_queue_.pop(); + if (batch.exception) { + throw WorkerException(batch.exception); + } + + total_example_count_in_queue_ -= batch.batch_data.size(); + lock.unlock(); + cv_write_.notify_all(); + + return batch.batch_data; + } + + /// Push preloaded chunks to batch queue. Called from the ChunkDataset worker + /// threads. + void add_chunk_data(UnwrappedBatchType data) { + std::unique_lock lock(queue_mutex_); + cv_write_.wait(lock, [this] { + // stop loading if we have preloaded enough data. + return this->total_example_count_in_queue_ < this->queue_capacity_ || + this->stop_; + }); + if (stop_) { + // When stop_ is true, it means no further chunk loading is necessary. + // Return without any further processing. + return; + } + + auto data_size = data.size(); + auto remaining_size = data_size; + example_sampler_.reset(data_size); + + auto fill_batch = [&](size_t example_count, UnwrappedBatchType& batch) { + auto batch_example_indices = this->example_sampler_.next(example_count); + AT_ASSERT( + batch_example_indices && + batch_example_indices.value().size() == example_count); + BatchRequestType& indices = batch_example_indices.value(); + for (size_t i : indices) { + TORCH_CHECK(i < data_size, "Index out of range"); + batch.emplace_back(std::move(data[i])); + } + remaining_size -= example_count; + }; + + if (!batch_queue_.empty()) { + // if the queue has existing data, and the last batch doesn't have enough + // examples to fill a batch_size batch, add more example to this batch + // first. + auto& batch = batch_queue_.back(); + size_t current_count = batch.batch_data.size(); + if (current_count < batch_size_) { + auto example_count = + std::min(remaining_size, batch_size_ - current_count); + fill_batch(example_count, batch.batch_data); + } + } + + // If we still have data remaining after filling the last pushed batch, add + // them to the queue too. + while (remaining_size > 0) { + UnwrappedBatchType current_batch; + + // Allocate the batch memory ahead of time. + current_batch.reserve(batch_size_); + + auto example_count = std::min(remaining_size, batch_size_); + fill_batch(example_count, current_batch); + batch_queue_.emplace(std::move(current_batch)); + } + total_example_count_in_queue_ += data_size; + lock.unlock(); + cv_read_.notify_all(); + } + + /// Push exceptions thrown during preloading into batch queue. Called from + /// the ChunkDataset worker threads. + void add_chunk_data(std::exception_ptr e_ptr) { + std::unique_lock lock(queue_mutex_); + cv_write_.wait(lock, [this] { + // stop loading if we have preloaded enough data. + return ( + this->total_example_count_in_queue_ < this->queue_capacity_ || + this->stop_); + }); + if (stop_) { + // When stop_ is true, it means this current thread needs to be tore down, + // the batch buffer will be discarded, so no need to enqueue any new + // exceptions. + return; + } + + batch_queue_.emplace(e_ptr); + lock.unlock(); + cv_read_.notify_all(); + } + + void stop() { + { + // Hold the lock before changing stop_ to prevent a race condition which + // can cause a deadlock. To be more specific, conditional variable + // cv_write_ waits on predicate stop_ in add_chunk_data(). The wait + // happens in two steps: 1) while still holding the lock, check if + // predicate is true; 2) if it is true, proceeds, otherwise, release the + // lock and wait until notified. Without holding a lock, cv_write_'s + // notification can happen in between step 1) and 2). In that case, as + // cv_write_ is not in waiting status yet, so the notification is lost and + // cv_write_ will sleep forever. By taking a lock before changing + // predicate stop_, it is ensured updating and evaluating stop_ always + // happen in a synchronized way + std::lock_guard lock(queue_mutex_); + stop_ = true; + } + + // notify all writers, wake them from wait to exit current method. + cv_write_.notify_all(); + // notify all readers too. + cv_read_.notify_all(); + } + /// The batch size is needed to create batches from the chunk data. Similar to + /// regular dataloader where the batches are created with prefetches, + /// BatchDataBuffer perform the batch creation using the provided batch size. + size_t batch_size_ = 0; + + /// count of total example stored in the queue + size_t total_example_count_in_queue_ = 0; + + /// struct that contains a raw unwrapped batch unit. An unwrapped batch unit + /// is the raw data without 'optional' wrapper. It can be a collection of + /// images, utterances, e.t.c. + struct UnwrappedBatchData { + explicit UnwrappedBatchData(UnwrappedBatchType data) + : batch_data(std::move(data)) {} + + explicit UnwrappedBatchData(std::exception_ptr e) + : exception(std::move(e)) {} + + /// batch data to return + UnwrappedBatchType batch_data; + + /// exception pointer which captures any abnormal exceptions while creating + /// the batch. + std::exception_ptr exception; + }; + + /// local cache to store example batches from loaded chunk + std::queue batch_queue_; + + // sync batch_queue_ update. + std::mutex queue_mutex_; + + std::condition_variable cv_read_; + std::condition_variable cv_write_; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + ExampleSampler& example_sampler_; + + // configurable maximum number of elements the queue can hold at one time. + size_t queue_capacity_; + + // When set to true, it wakes the writer threads from the wait and exit + // current function call. This is needed when ChunkDataSet.Reset is called + // while the previous epoch is not exhausted yet. When ChunkDataset is waiting + // its preloader to finish previous work before tearing down the thread, the + // preloader could be still waiting for the conditional variable, thus cause + // the program to hang. This boolean is used to break this waiting condition. + bool stop_ = false; +}; +} // namespace detail + +/// Options to configure a `ChunkDataset`. +struct ChunkDatasetOptions { + ChunkDatasetOptions() = delete; + ChunkDatasetOptions( + size_t preloader_count, + size_t batch_size, + size_t cache_size = 2048, + size_t cross_chunk_shuffle_count = 1) + : preloader_count_(preloader_count), + batch_size_(batch_size), + cache_size_(cache_size), + cross_chunk_shuffle_count_(cross_chunk_shuffle_count) { + TORCH_CHECK( + preloader_count_ > 0, + "Preloader count is 0. At least one preloader needs to be specified."); + TORCH_CHECK( + batch_size_ > 0, + "Batch size is 0. A positive batch size needs to be specified."); + TORCH_CHECK( + cache_size_ > 0, + "Cache size is 0. A positive cache size needs to be specified."); + TORCH_CHECK( + cache_size_ >= batch_size_, + "Cache size is less than batch size. Cache needs to be large enough to " + "hold at least one batch."); + TORCH_CHECK( + cross_chunk_shuffle_count_ > 0, + "cross_chunk_shuffle_count needs to be greater than 0."); + } + + /// The number of worker thread to preload chunk data. + TORCH_ARG(size_t, preloader_count); + + /// The size of each batch. + TORCH_ARG(size_t, batch_size); + + /// The capacity of the queue for batch caching. + TORCH_ARG(size_t, cache_size) = 2048; + + // The number of chunks to perform cross-chunk shuffling. Default to 1 meaning + // no cross-chunk shuffling. When it is equal to n (n > 1), n random + // chunks will be loaded at once and example shuffling will be performed + // across all those n chunks. + // Note: Usually the default config (1 chunk shuffle + example shuffle) is + // good enough to generate random distributed data. Use this parameter only if + // you know cross-shuffle is needed in your case. Also there is a performance + // penalty when this value is greater than 1, as we need to do extra merge + // between multiple chunks before performing example sampling. + TORCH_ARG(size_t, cross_chunk_shuffle_count) = 1; +}; + +/// A stateful dataset that support hierarchical sampling and prefetching of +/// entre chunks. +/// +/// Unlike regular dataset, chunk dataset require two samplers to operate and +/// keeps an internal state. `ChunkSampler` selects, which chunk to load next, +/// while the `ExampleSampler` determines the order of Examples that are +/// returned in each `get_batch` call. The hierarchical sampling approach used +/// here is inspired by this paper +/// http://martin.zinkevich.org/publications/nips2010.pdf +template < + typename ChunkReader, + typename ChunkSampler = samplers::RandomSampler, + typename ExampleSampler = samplers::RandomSampler> +class ChunkDataset final + : public StatefulDataset< + ChunkDataset, + typename ChunkReader::BatchType, + size_t> { + public: + using BatchType = std::optional; + using UnwrappedBatchType = typename ChunkReader::BatchType; + using BatchRequestType = size_t; + using ChunkSamplerType = ChunkSampler; + using ExampleSamplerType = ExampleSampler; + + ChunkDataset( + ChunkReader chunk_reader, + ChunkSampler chunk_sampler, + ExampleSampler example_sampler, + ChunkDatasetOptions options, + std::function preprocessing_policy = + std::function()) + : chunk_reader_(std::move(chunk_reader)), + chunk_sampler_(std::move(chunk_sampler)), + example_sampler_(std::move(example_sampler)), + options_(options), + preprocessing_policy_(std::move(preprocessing_policy)), + quit_worker_(false), + running_preloaders_(0) {} + + ~ChunkDataset() override { + // stop batch buffer first. + if (batch_buffer_) { + batch_buffer_->stop(); + } + free_workers(); + } + + /// Default get_batch method of BatchDataset. This method returns + /// Example batches created from the preloaded chunks. The implementation + /// is dataset agnostic and does not need overriding in different chunk + /// datasets. + BatchType get_batch(size_t batch_size) override { + TORCH_CHECK( + batch_buffer_ != nullptr, + "Dataset needs to call reset() before calling get_batch()."); + + TORCH_CHECK( + batch_size == options_.batch_size(), + "The requested batch size does not match with the initialized batch size.\n" + " The requested batch size is ", + batch_size, + ", while the dataset is created with batch size equal to ", + options_.batch_size()); + return batch_buffer_->get_batch(); + } + + /// Helper method around get_batch as `batch_size` is not strictly necessary + BatchType get_batch() { + return get_batch(options_.batch_size()); + } + + /// This will clear any internal state and starts the internal prefetching + /// mechanism for the chunk dataset. + void reset() override { + // We need this to support partial data reads via dataloader iterator. + if (batch_buffer_) { + batch_buffer_->stop(); + } + // free workers from previous reset if there is any. + free_workers(); + preload_threads_.clear(); + + if (!load_checkpoint_) { + chunk_reader_.reset(); + chunk_sampler_.reset(chunk_reader_.chunk_count()); + load_checkpoint_ = false; + } + + // Throw out any existing cached batch in the buffer and re-creates a new + // chunk buffer. + batch_buffer_ = std::make_unique< + detail::BatchDataBuffer>( + options_.batch_size(), example_sampler_, options_.cache_size()); + + // create new workers for this new epoch. + quit_worker_ = false; + + AT_ASSERT(running_preloaders_ == 0); + running_preloaders_ = options_.preloader_count(); + for (const auto i : c10::irange(options_.preloader_count())) { + preload_threads_.emplace_back([this, i]() { this->preloader(i); }); + } + } + + /// size is not used for chunk dataset. + std::optional size() const override { + return std::nullopt; + } + + // provide a references to chunk sampler. Used mainly in distributed data + // loading to set the epoch number for the sampler. + ChunkSamplerType& chunk_sampler() { + return chunk_sampler_; + } + + void save(serialize::OutputArchive& archive) const override { + std::lock_guard lock(chunk_index_guard_); + chunk_sampler_.save(archive); + } + + void load(serialize::InputArchive& archive) override { + std::lock_guard lock(chunk_index_guard_); + chunk_sampler_.load(archive); + load_checkpoint_ = true; + } + + private: + /// running on worker thread to preload chunk data. + void preloader(size_t id) { + while (!quit_worker_.load()) { + try { + std::vector chunk_idx; + { + std::lock_guard lock(chunk_index_guard_); + if (auto chunk_sampler_result = chunk_sampler_.next( + this->options_.cross_chunk_shuffle_count())) { + chunk_idx = chunk_sampler_result.value(); + } else { + break; + } + } + UnwrappedBatchType data = chunk_reader_.read_chunk(chunk_idx[0]); + for (const auto i : c10::irange(1, chunk_idx.size())) { + auto chunk_data = chunk_reader_.read_chunk(chunk_idx[i]); + std::move( + chunk_data.begin(), chunk_data.end(), std::back_inserter(data)); + } + if (preprocessing_policy_) { + preprocessing_policy_(data); + } + if (!data.empty()) { // skip empty chunks. + batch_buffer_->add_chunk_data(std::move(data)); + } + } catch (...) { + batch_buffer_->add_chunk_data(std::current_exception()); + } + } + AT_ASSERT(running_preloaders_.load() > 0); + --running_preloaders_; + if (running_preloaders_.load() == 0) { + // all preloaders are completed, so we can notify the batch_buffer. + batch_buffer_->stop(); + } + } + + /// Block the current thread until the workers finish execution and exit. + void free_workers() { + if (!quit_worker_.load()) { + quit_worker_ = true; + for (auto& worker_thread : preload_threads_) { + worker_thread.join(); + } + } + } + + private: + // Templated class that defines what is a chunk and how to read chunk data. + // When a chunk is returned by chunk_reader_, ChunkDataset split it into + // batches and caches them in batch_buffer_. + ChunkReader chunk_reader_; + + // chunk sampler to shuffle different chunks + ChunkSamplerType chunk_sampler_; + + // example sampler to shuffle examples in a specific chunk + ExampleSamplerType example_sampler_; + + // batch data buffer which holds chunk data from preloading thread. + std::shared_ptr< + detail::BatchDataBuffer> + batch_buffer_; + + // worker thread pool + std::vector preload_threads_; + + /// The options the Dataset was configured with. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ChunkDatasetOptions options_; + + // function pointer wrapper to apply custom processing over chunk data. This + // is considered an advanced parameter for developers who want to apply a + // pre-process to the chunk data before sampling into minibatch. + // Different than the collate function, this policy is applied on the chunk + // level, instead of minibatch level. When a chunk of data is loaded (multiple + // chunks if cross_chunk_shuffle_count_ is greater than 1), this policy is + // applied to the full loaded data. It is useful if developers want to + // perform pre-processing (like bucketing) to the chunk data before + // example sampler samples the data. By default it's an empty pointer and no + // action will be taken. + std::function preprocessing_policy_; + + // indicate whether the worker thread can be teared down + std::atomic quit_worker_; + + // keep track of running preloaders to notify batch buffer. A value 0 + // indicates that the chunk loading is completed. + std::atomic running_preloaders_; + + // mutex to synchronize chunk sampler next() call. + mutable std::mutex chunk_index_guard_; + + // boolean value to indicate whether we need to load the checkpoint for + // chunk_sampler_. + bool load_checkpoint_{false}; +}; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h new file mode 100644 index 0000000000000000000000000000000000000000..7f763b199d610139b62c55abdfd908472dd806c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include +#include + +namespace torch::data::datasets { +namespace detail { +template +using optional_if_t = std::conditional_t, T>; +} // namespace detail + +/// A `MapDataset` is a dataset that applies a transform to a source dataset. +template +class MapDataset : public BatchDataset< + MapDataset, + detail::optional_if_t< + SourceDataset::is_stateful, + typename AppliedTransform::OutputBatchType>, + typename SourceDataset::BatchRequestType> { + public: + using DatasetType = SourceDataset; + using TransformType = AppliedTransform; + using BatchRequestType = typename SourceDataset::BatchRequestType; + using OutputBatchType = detail::optional_if_t< + SourceDataset::is_stateful, + typename AppliedTransform::OutputBatchType>; + + MapDataset(DatasetType dataset, TransformType transform) + : dataset_(std::move(dataset)), transform_(std::move(transform)) {} + + /// Gets a batch from the source dataset and applies the transform to it, + /// returning the result. + OutputBatchType get_batch(BatchRequestType indices) override { + return get_batch_impl(std::move(indices)); + } + + /// Returns the size of the source dataset. + // NOLINTNEXTLINE(bugprone-exception-escape) + std::optional size() const noexcept override { + return dataset_.size(); + } + + /// Calls `reset()` on the underlying dataset. + /// NOTE: Stateless datasets do not have a reset() method, so a call to this + /// method will only compile for stateful datasets (which have a reset() + /// method). + void reset() { + dataset_.reset(); + } + + /// Returns the underlying dataset. + const SourceDataset& dataset() noexcept { + return dataset_; + } + + /// Returns the transform being applied. + const AppliedTransform& transform() noexcept { + return transform_; + } + + private: + /// The implementation of `get_batch()` for the stateless case, which simply + /// applies the transform to the output of `get_batch()` from the dataset. + template < + typename D = SourceDataset, + typename = std::enable_if_t> + OutputBatchType get_batch_impl(BatchRequestType indices) { + return transform_.apply_batch(dataset_.get_batch(std::move(indices))); + } + + /// The implementation of `get_batch()` for the stateful case. Here, we follow + /// the semantics of `Optional.map()` in many functional languages, which + /// applies a transformation to the optional's content when the optional + /// contains a value, and returns a new optional (of a different type) if the + /// original optional returned by `get_batch()` was empty. + template + std::enable_if_t get_batch_impl( + BatchRequestType indices) { + if (auto batch = dataset_.get_batch(std::move(indices))) { + return transform_.apply_batch(std::move(*batch)); + } + return std::nullopt; + } + + /// The underlying dataset being transformed. + SourceDataset dataset_; + + // The transformation that is applied to batches received from the dataset. + AppliedTransform transform_; +}; + +/// Creates a `MapDataset` with the given dataset and transform. +template +MapDataset map( + DatasetType dataset, + TransformType transform) { + static_assert( + std::is_same_v< + std::conditional_t< + DatasetType::is_stateful, + typename DatasetType::BatchType::value_type, + typename DatasetType::BatchType>, + typename TransformType::InputBatchType>, + "BatchType type of dataset does not match input type of transform"); + return {std::move(dataset), std::move(transform)}; +} + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h new file mode 100644 index 0000000000000000000000000000000000000000..1e55d9ed51d5c15c4780b50832aee8cc1a72f721 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#include +#include + +namespace torch::data::datasets { +/// The MNIST dataset. +class TORCH_API MNIST : public Dataset { + public: + /// The mode in which the dataset is loaded. + enum class Mode { kTrain, kTest }; + + /// Loads the MNIST dataset from the `root` path. + /// + /// The supplied `root` path should contain the *content* of the unzipped + /// MNIST dataset, available from http://yann.lecun.com/exdb/mnist. + explicit MNIST(const std::string& root, Mode mode = Mode::kTrain); + + /// Returns the `Example` at the given `index`. + Example<> get(size_t index) override; + + /// Returns the size of the dataset. + std::optional size() const override; + + /// Returns true if this is the training subset of MNIST. + // NOLINTNEXTLINE(bugprone-exception-escape) + bool is_train() const noexcept; + + /// Returns all images stacked into a single tensor. + const Tensor& images() const; + + /// Returns all targets stacked into a single tensor. + const Tensor& targets() const; + + private: + Tensor images_, targets_; +}; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h new file mode 100644 index 0000000000000000000000000000000000000000..ee516362464826e3a6b4640ceb1dbf1487ed2aec --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::data::datasets { + +/// A dataset that wraps another dataset in a shared pointer and implements the +/// `BatchDataset` API, delegating all calls to the shared instance. This is +/// useful when you want all worker threads in the dataloader to access the same +/// dataset instance. The dataset must take care of synchronization and +/// thread-safe access itself. +/// +/// Use `torch::data::datasets::make_shared_dataset()` to create a new +/// `SharedBatchDataset` like you would a `std::shared_ptr`. +template +class SharedBatchDataset : public BatchDataset< + SharedBatchDataset, + typename UnderlyingDataset::BatchType, + typename UnderlyingDataset::BatchRequestType> { + public: + using BatchType = typename UnderlyingDataset::BatchType; + using BatchRequestType = typename UnderlyingDataset::BatchRequestType; + + /// Constructs a new `SharedBatchDataset` from a `shared_ptr` to the + /// `UnderlyingDataset`. + /* implicit */ SharedBatchDataset( + std::shared_ptr shared_dataset) + : dataset_(std::move(shared_dataset)) {} + + /// Calls `get_batch` on the underlying dataset. + BatchType get_batch(BatchRequestType request) override { + return dataset_->get_batch(std::move(request)); + } + + /// Returns the `size` from the underlying dataset. + std::optional size() const override { + return dataset_->size(); + } + + /// Accesses the underlying dataset. + UnderlyingDataset& operator*() { + return *dataset_; + } + + /// Accesses the underlying dataset. + const UnderlyingDataset& operator*() const { + return *dataset_; + } + + /// Accesses the underlying dataset. + UnderlyingDataset* operator->() { + return dataset_.get(); + } + + /// Accesses the underlying dataset. + const UnderlyingDataset* operator->() const { + return dataset_.get(); + } + + /// Calls `reset()` on the underlying dataset. + void reset() { + dataset_->reset(); + } + + private: + std::shared_ptr dataset_; +}; + +/// Constructs a new `SharedBatchDataset` by creating a +/// `shared_ptr`. All arguments are forwarded to +/// `make_shared`. +template +SharedBatchDataset make_shared_dataset(Args&&... args) { + return std::make_shared(std::forward(args)...); +} +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h new file mode 100644 index 0000000000000000000000000000000000000000..23720e40db66a4bbf4fa73a2e1ae8707b534e7ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h @@ -0,0 +1,69 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::datasets { + +/// A stateful dataset is a dataset that maintains some internal state, which +/// will be `reset()` at the beginning of each epoch. Subclasses can override +/// the `reset()` method to configure this behavior. Further, the return type of +/// a stateful dataset's `get_batch()` method is always an `optional`. When the +/// stateful dataset wants to indicate to the dataloader that its epoch has +/// ended, it should return an empty optional. The dataloader knows to modify +/// its implementation based on whether the dataset is stateless or stateful. +/// +/// Note that when subclassing a from `StatefulDataset`, the return +/// type of `get_batch()`, which the subclass must override, will be +/// `optional` (i.e. the type specified in the `StatefulDataset` +/// specialization is automatically boxed into an `optional` for the dataset's +/// `BatchType`). +template < + typename Self, + typename Batch = std::vector>, + typename BatchRequest = size_t> +class StatefulDataset + : public BatchDataset, BatchRequest> { + public: + /// Resets internal state of the dataset. + virtual void reset() = 0; + + /// Saves the statefulDataset's state to OutputArchive. + virtual void save(serialize::OutputArchive& archive) const = 0; + + /// Deserializes the statefulDataset's state from the `archive`. + virtual void load(serialize::InputArchive& archive) = 0; +}; + +/// Serializes a statefulDataset to `OutputArchive`. +template +serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const StatefulDataset& statefulDataset) { + statefulDataset.save(archive); + return archive; +} + +/// Deserializes a statefulDataset from an `InputArchive`. +template +serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + StatefulDataset& statefulDataset) { + statefulDataset.load(archive); + return archive; +} + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..213be9b6e45c4bd20c8adceff8676f6843772c60 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::datasets { + +/// A dataset of tensors. +/// Stores a single tensor internally, which is then indexed inside `get()`. +struct TensorDataset : public Dataset { + /// Creates a `TensorDataset` from a vector of tensors. + explicit TensorDataset(const std::vector& tensors) + : TensorDataset(torch::stack(tensors)) {} + + explicit TensorDataset(torch::Tensor tensor) : tensor(std::move(tensor)) {} + + /// Returns a single `TensorExample`. + TensorExample get(size_t index) override { + return tensor[static_cast(index)]; + } + + /// Returns the number of tensors in the dataset. + std::optional size() const override { + return tensor.size(0); + } + + Tensor tensor; +}; + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h new file mode 100644 index 0000000000000000000000000000000000000000..433ed49aab5bf2506e4fcea421f683f8c2fb5e12 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h @@ -0,0 +1,88 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace torch::data::detail { + +/// Encapsulates the full life cycle of DataLoader jobs. +/// +/// When a new job is enqueued to the `DataShuttle`, a counter for in-flight +/// jobs is bumped. This job is said to be "in-flight" until its result is +/// popped. Worker threads dequeue jobs as soon as they are available. When a +/// worker finishes a job, it enqueues the result. Only when the main thread +/// dequeues a result is the count of in-flight jobs decremented. When the main +/// thread attempts to dequeue a job but no jobs are in-flight, that means the +/// epoch is complete and `pop_result` returns an empty optional. +template +class DataShuttle { + public: + /// Pushes a new job. Called by the main thread. + void push_job(Job job) { + new_jobs_.push(std::move(job)); + ++in_flight_jobs_; + } + + /// Pushes the result of a job. Called by worker threads. + void push_result(Result result) { + results_.push(std::move(result)); + } + + /// Returns the next job, blocking until there is one available. Called by + /// worker threads. + Job pop_job() { + return new_jobs_.pop(); + } + + /// Returns the result of a job, or nullopt if all jobs were exhausted. Called + /// by the main thread. + std::optional pop_result( + std::optional timeout = std::nullopt) { + if (in_flight_jobs_ > 0) { + auto result = results_.pop(timeout); + --in_flight_jobs_; + return result; + } + return std::nullopt; + } + + /// Discards any jobs that are not yet in flight, and waits for all in-flight + /// jobs to finish, discarding their result. + void drain() { + // Clear all inputs so that no further jobs are scheduled. + auto number_cleared = new_jobs_.clear(); + in_flight_jobs_ -= number_cleared; + // Remove any outstanding results. + while (in_flight_jobs_ > 0) { + pop_result(); + } + } + + /// Returns the number of jobs that are still in progress. + /// When this number is zero, an epoch is finished. + size_t in_flight_jobs() const noexcept { + return in_flight_jobs_; + } + + private: + /// The queue for jobs that are not yet in flight. + Queue new_jobs_; + /// The number of in-flight jobs. + /// NOTE: Not atomic because only manipulated by the main thread. + size_t in_flight_jobs_ = 0; + /// The queue for results of finished jobs. + Queue results_; +}; + +} // namespace torch::data::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h new file mode 100644 index 0000000000000000000000000000000000000000..d369014923cbacf1d0f3a98ef830e864f1b852ee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include + +namespace torch::data::detail { + +/// A basic locked, blocking MPMC queue. +/// +/// Every `push` and `pop` is guarded by a mutex. A condition variable is used +/// to communicate insertion of new elements, such that waiting threads will be +/// woken up if they are currently waiting inside a call to `pop()`. +/// +/// Note that this data structure is written specifically for use with the +/// `DataLoader`. Its behavior is tailored to this use case and may not be +/// applicable to more general uses. +template +class Queue { + public: + /// Pushes a new value to the back of the `Queue` and notifies one thread on + /// the waiting side about this event. + void push(T value) { + { + std::lock_guard lock(mutex_); + queue_.push(std::move(value)); + } + cv_.notify_one(); + } + + /// Blocks until at least one element is ready to be popped from the front of + /// the queue. An optional `timeout` in seconds can be used to limit the time + /// spent waiting for an element. If the wait times out, an exception is + /// raised. + T pop(std::optional timeout = std::nullopt) { + std::unique_lock lock(mutex_); + if (timeout) { + if (!cv_.wait_for( + lock, *timeout, [this] { return !this->queue_.empty(); })) { + // clang-format off + TORCH_CHECK(false, + "Timeout in DataLoader queue while waiting for next batch" + " (timeout was ", timeout->count(), " ms)"); + // clang-format on + } + } else { + cv_.wait(lock, [this] { return !this->queue_.empty(); }); + } + AT_ASSERT(!queue_.empty()); + T value = queue_.front(); + queue_.pop(); + lock.unlock(); + return value; + } + + /// Empties the queue and returns the number of elements that were present at + /// the start of the function. No threads are notified about this event as it + /// is assumed to be used to drain the queue during shutdown of a + /// `DataLoader`. + size_t clear() { + std::lock_guard lock(this->mutex_); + const auto size = queue_.size(); + while (!queue_.empty()) { + queue_.pop(); + } + return size; + } + + private: + std::queue queue_; + std::mutex mutex_; + std::condition_variable cv_; +}; +} // namespace torch::data::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h new file mode 100644 index 0000000000000000000000000000000000000000..02aa79dc20b234a2fa97055a10a9626f7214aa7b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h @@ -0,0 +1,112 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::data::detail::sequencers { +namespace detail { +template +bool buffer_contains_result(const std::vector>& buffer) { + return std::any_of( + buffer.begin(), buffer.end(), [](const std::optional& result) { + return result.has_value(); + }); +} +} // namespace detail + +/// A `Sequencer` accepts a function that yields the next result of a +/// `DataLoader` and then has the opportunity to influence the order in which +/// these results are returned. The `NoSequencer` does not enforce any +/// sequencing and returns any result directly. The `OrderedSequencer` instead +/// buffers results internally to return them in order of their sequence number. +template +struct Sequencer { + using ResultProducer = std::function()>; + virtual ~Sequencer() = default; + virtual std::optional next(ResultProducer next_result) = 0; +}; + +/// A `Sequencer` that does not enforce any ordering. It is effectively the +/// identity function. +template +struct NoSequencer final : public Sequencer { + using typename Sequencer::ResultProducer; + std::optional next(ResultProducer next_result) override { + return next_result(); + } +}; + +/// A `Sequencer` that buffers results and returns them in order of their +/// sequence number. The `OrderedSequencer` maintains an internal, monotonically +/// incrementing counter for the next sequence number it expects. If it receives +/// a result with a higher sequence number, it will buffer it for later (when +/// the sequence number reaches that of this result). Otherwise, if the sequence +/// numbers match, the result is returned. +/// +/// Implementation note: The `OrderedSequencer` is implemented with a fixed-size +/// buffer. Let `m` be the maximum number of jobs in the data loader's queue and +/// `s` be the current sequence number. Assume `m` jobs are scheduled in the +/// `DataLoader`. Any new result is stored at index `job.sqn mod m` in the +/// `OrderedSequencer`. Why are we sure sequence numbers of new jobs will not +/// collide with sequence numbers of buffered jobs? The `OrderedSequencer` will +/// not return from `next()` until it receives the result with sqn `s`. This +/// means no new jobs can be scheduled in the `DataLoader` in the meantime, +/// which enforces that as long as sqn `s` has not been received, `s + m` (which +/// would cause a collision in the fixed-size buffer) will not yet be scheduled. +template +struct OrderedSequencer : public Sequencer { + using typename Sequencer::ResultProducer; + + /// Constructs the `OrderedSequencer` with the maximum number of results it + /// will ever hold at one point in time. + explicit OrderedSequencer(size_t max_jobs) : buffer_(max_jobs) {} + + /// Buffers results until the next one in the expected order is received. + std::optional next(ResultProducer next_result) override { + // If we already have the result for the next sqn, return it. + if (auto& maybe_result = buffer(next_sequence_number_)) { + auto result = std::move(*maybe_result); + buffer(next_sequence_number_++).reset(); + return result; + } + // Otherwise wait for the next result. + while (true) { + auto result = next_result(); + if (!result) { + AT_ASSERT(!detail::buffer_contains_result(buffer_)); + break; + } + // If it was not nullopt and the sequence numbers match, return it + // directly and bump the sequence number. + if (result->sequence_number == next_sequence_number_) { + ++next_sequence_number_; + return result; + } + // Stash the result for later. + AT_ASSERT(!buffer(result->sequence_number).has_value()); + buffer(result->sequence_number) = std::move(result); + } + // The result was an empty optional, so we are done with this epoch. + return std::nullopt; + } + + /// Accesses the buffer at the `index` modulo the buffer size. + std::optional& buffer(size_t index) { + return buffer_.at(index % buffer_.size()); + } + + /// The monotonically increasing sequence number we expect. + size_t next_sequence_number_ = 0; + + /// A fixed-size buffer (after construction). + std::vector> buffer_; +}; +} // namespace torch::data::detail::sequencers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h new file mode 100644 index 0000000000000000000000000000000000000000..9d253649072d50dcae35dd67d40c08d7716b518b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { +/// A `Sampler` is an object that yields an index with which to access a +/// dataset. +template > +class Sampler { + public: + using BatchRequestType = BatchRequest; + + virtual ~Sampler() = default; + + /// Resets the `Sampler`'s internal state. + /// Typically called before a new epoch. + /// Optionally, accepts a new size when resetting the sampler. + virtual void reset(std::optional new_size) = 0; + + /// Returns the next index if possible, or an empty optional if the + /// sampler is exhausted for this epoch. + virtual std::optional next(size_t batch_size) = 0; + + /// Serializes the `Sampler` to the `archive`. + virtual void save(serialize::OutputArchive& archive) const = 0; + + /// Deserializes the `Sampler` from the `archive`. + virtual void load(serialize::InputArchive& archive) = 0; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h new file mode 100644 index 0000000000000000000000000000000000000000..637ef93b2902905cb9d6125af973345993838dc7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::data::samplers { +/// A base class for custom index types. +struct TORCH_API CustomBatchRequest { + CustomBatchRequest() = default; + CustomBatchRequest(const CustomBatchRequest&) = default; + CustomBatchRequest(CustomBatchRequest&&) noexcept = default; + virtual ~CustomBatchRequest() = default; + + /// The number of elements accessed by this index. + virtual size_t size() const = 0; +}; +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h new file mode 100644 index 0000000000000000000000000000000000000000..1435621eb41c1f776e3e607d45bfa320a471e795 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h @@ -0,0 +1,138 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that selects a subset of indices to sample from and defines a +/// sampling behavior. In a distributed setting, this selects a subset of the +/// indices depending on the provided num_replicas and rank parameters. The +/// `Sampler` performs a rounding operation based on the `allow_duplicates` +/// parameter to decide the local sample count. +template > +class DistributedSampler : public Sampler { + public: + DistributedSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true) + : size_(size), + num_replicas_(num_replicas), + rank_(rank), + + allow_duplicates_(allow_duplicates) {} + + /// Set the epoch for the current enumeration. This can be used to alter the + /// sample selection and shuffling behavior. + void set_epoch(size_t epoch) { + epoch_ = epoch; + } + + size_t epoch() const { + return epoch_; + } + + protected: + size_t local_sample_count() { + if (allow_duplicates_) { + return (size_ + num_replicas_ - 1) / num_replicas_; + } else { + return size_ / num_replicas_; + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t size_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t num_replicas_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t rank_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t epoch_{0}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool allow_duplicates_; +}; + +/// Select samples randomly. The sampling order is shuffled at each `reset()` +/// call. +class TORCH_API DistributedRandomSampler : public DistributedSampler<> { + public: + DistributedRandomSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true); + + /// Resets the `DistributedRandomSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `DistributedRandomSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `DistributedRandomSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `DistributedRandomSampler`. + size_t index() const noexcept; + + private: + void populate_indices(); + + size_t begin_index_; + size_t end_index_; + size_t sample_index_; + std::vector all_indices_; +}; + +/// Select samples sequentially. +class TORCH_API DistributedSequentialSampler : public DistributedSampler<> { + public: + DistributedSequentialSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true); + + /// Resets the `DistributedSequentialSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `DistributedSequentialSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `DistributedSequentialSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `DistributedSequentialSampler`. + size_t index() const noexcept; + + private: + void populate_indices(); + + size_t begin_index_; + size_t end_index_; + size_t sample_index_; + std::vector all_indices_; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h new file mode 100644 index 0000000000000000000000000000000000000000..a671b92c54fb1687527c5a6a781ef93c2bf5ad18 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that returns random indices. +class TORCH_API RandomSampler : public Sampler<> { + public: + /// Constructs a `RandomSampler` with a size and dtype for the stored indices. + /// + /// The constructor will eagerly allocate all required indices, which is the + /// sequence `0 ... size - 1`. `index_dtype` is the data type of the stored + /// indices. You can change it to influence memory usage. + explicit RandomSampler(int64_t size, Dtype index_dtype = torch::kInt64); + + ~RandomSampler() override; + + /// Resets the `RandomSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `RandomSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `RandomSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `RandomSampler`. + size_t index() const noexcept; + + private: + at::Tensor indices_; + int64_t index_ = 0; +}; +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h new file mode 100644 index 0000000000000000000000000000000000000000..ed2a5c936da892a4a354546dc696d5ed075023e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that returns indices sequentially. +class TORCH_API SequentialSampler : public Sampler<> { + public: + /// Creates a `SequentialSampler` that will return indices in the range + /// `0...size - 1`. + explicit SequentialSampler(size_t size); + + /// Resets the `SequentialSampler` to zero. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `SequentialSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `SequentialSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `SequentialSampler`. + size_t index() const noexcept; + + private: + size_t size_; + size_t index_{0}; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h new file mode 100644 index 0000000000000000000000000000000000000000..bc1bed5f38dc3ae924119ee44db9c8e2f1c2a997 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::data::samplers { +/// Serializes a `Sampler` into an `OutputArchive`. +template +serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const Sampler& sampler) { + sampler.save(archive); + return archive; +} + +/// Deserializes a `Sampler` from an `InputArchive`. +template +serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + Sampler& sampler) { + sampler.load(archive); + return archive; +} +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ff4614aefe85516fac03f88cbd9b2d622abb83 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h @@ -0,0 +1,62 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::serialize { +class InputArchive; +class OutputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A wrapper around a batch size value, which implements the +/// `CustomBatchRequest` interface. +struct TORCH_API BatchSize : public CustomBatchRequest { + explicit BatchSize(size_t size); + size_t size() const noexcept override; + operator size_t() const noexcept; + size_t size_; +}; + +/// A sampler for (potentially infinite) streams of data. +/// +/// The major feature of the `StreamSampler` is that it does not return +/// particular indices, but instead only the number of elements to fetch from +/// the dataset. The dataset has to decide how to produce those elements. +class TORCH_API StreamSampler : public Sampler { + public: + /// Constructs the `StreamSampler` with the number of individual examples that + /// should be fetched until the sampler is exhausted. + explicit StreamSampler(size_t epoch_size); + + /// Resets the internal state of the sampler. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns a `BatchSize` object with the number of elements to fetch in the + /// next batch. This number is the minimum of the supplied `batch_size` and + /// the difference between the `epoch_size` and the current index. If the + /// `epoch_size` has been reached, returns an empty optional. + std::optional next(size_t batch_size) override; + + /// Serializes the `StreamSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `StreamSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + private: + size_t examples_retrieved_so_far_ = 0; + size_t epoch_size_; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h new file mode 100644 index 0000000000000000000000000000000000000000..1681070363cecfe3685c45b31626ebdf6a6e8812 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h @@ -0,0 +1,54 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::data::transforms { + +/// A transformation of a batch to a new batch. +template +class BatchTransform { + public: + using InputBatchType = InputBatch; + using OutputBatchType = OutputBatch; + + virtual ~BatchTransform() = default; + + /// Applies the transformation to the given `input_batch`. + virtual OutputBatch apply_batch(InputBatch input_batch) = 0; +}; + +/// A transformation of individual input examples to individual output examples. +/// +/// Just like a `Dataset` is a `BatchDataset`, a `Transform` is a +/// `BatchTransform` that can operate on the level of individual examples rather +/// than entire batches. The batch-level transform is implemented (by default) +/// in terms of the example-level transform, though this can be customized. +template +class Transform + : public BatchTransform, std::vector> { + public: + using InputType = Input; + using OutputType = Output; + + /// Applies the transformation to the given `input`. + virtual OutputType apply(InputType input) = 0; + + /// Applies the `transformation` over the entire `input_batch`. + std::vector apply_batch(std::vector input_batch) override { + std::vector output_batch; + output_batch.reserve(input_batch.size()); + for (auto&& input : input_batch) { + output_batch.push_back(apply(std::move(input))); + } + return output_batch; + } +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h new file mode 100644 index 0000000000000000000000000000000000000000..10aa60d70be40ed8b06c88318e94ad28fd274c88 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::data::transforms { + +/// A `Collation` is a transform that reduces a batch into a single value. +/// The result is a `BatchDataset` that has the type of the single value as its +/// `BatchType`. +template > +using Collation = BatchTransform; + +/// A `Collate` allows passing a custom function to reduce/collate a batch +/// into a single value. It's effectively the lambda version of `Collation`, +/// which you could subclass and override `operator()` to achieve the same. +/// +/// \rst +/// .. code-block:: cpp +/// using namespace torch::data; +/// +/// auto dataset = datasets::MNIST("path/to/mnist") +/// .map(transforms::Collate>([](std::vector> e) { +/// return std::move(e.front()); +/// })); +/// \endrst +template > +using Collate = BatchLambda; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h new file mode 100644 index 0000000000000000000000000000000000000000..9bb9e3e986224f0618cdfe5cf65a9f5896b765df --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::data::transforms { + +/// A `BatchTransform` that applies a user-provided functor to a batch. +template +class BatchLambda : public BatchTransform { + public: + using typename BatchTransform::InputBatchType; + using typename BatchTransform::OutputBatchType; + using FunctionType = std::function; + + /// Constructs the `BatchLambda` from the given `function` object. + explicit BatchLambda(FunctionType function) + : function_(std::move(function)) {} + + /// Applies the user-provided function object to the `input_batch`. + OutputBatchType apply_batch(InputBatchType input_batch) override { + return function_(std::move(input_batch)); + } + + private: + FunctionType function_; +}; + +// A `Transform` that applies a user-provided functor to individual examples. +template +class Lambda : public Transform { + public: + using typename Transform::InputType; + using typename Transform::OutputType; + using FunctionType = std::function; + + /// Constructs the `Lambda` from the given `function` object. + explicit Lambda(FunctionType function) : function_(std::move(function)) {} + + /// Applies the user-provided function object to the `input`. + OutputType apply(InputType input) override { + return function_(std::move(input)); + } + + private: + FunctionType function_; +}; + +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h new file mode 100644 index 0000000000000000000000000000000000000000..c0f5db9b43ba3d41ddb64dbb4482aefe6c774d77 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::transforms { + +template > +struct Stack; + +/// A `Collation` for `Example` types that stacks all data +/// tensors into one tensor, and all target (label) tensors into one tensor. +template <> +struct Stack> : public Collation> { + Example<> apply_batch(std::vector> examples) override { + std::vector data, targets; + data.reserve(examples.size()); + targets.reserve(examples.size()); + for (auto& example : examples) { + data.push_back(std::move(example.data)); + targets.push_back(std::move(example.target)); + } + return {torch::stack(data), torch::stack(targets)}; + } +}; + +/// A `Collation` for `Example` types that stacks all data +/// tensors into one tensor. +template <> +struct Stack + : public Collation> { + TensorExample apply_batch(std::vector examples) override { + std::vector data; + data.reserve(examples.size()); + for (auto& example : examples) { + data.push_back(std::move(example.data)); + } + return torch::stack(data); + } +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..cba46ead0d39671b9358d78d22c639a8585ffcf4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h @@ -0,0 +1,78 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::transforms { + +/// A `Transform` that is specialized for the typical `Example` +/// combination. It exposes a single `operator()` interface hook (for +/// subclasses), and calls this function on input `Example` objects. +template +class TensorTransform + : public Transform, Example> { + public: + using E = Example; + using typename Transform::InputType; + using typename Transform::OutputType; + + /// Transforms a single input tensor to an output tensor. + virtual Tensor operator()(Tensor input) = 0; + + /// Implementation of `Transform::apply` that calls `operator()`. + OutputType apply(InputType input) override { + input.data = (*this)(std::move(input.data)); + return input; + } +}; + +/// A `Lambda` specialized for the typical `Example` input type. +template +class TensorLambda : public TensorTransform { + public: + using FunctionType = std::function; + + /// Creates a `TensorLambda` from the given `function`. + explicit TensorLambda(FunctionType function) + : function_(std::move(function)) {} + + /// Applies the user-provided functor to the input tensor. + Tensor operator()(Tensor input) override { + return function_(std::move(input)); + } + + private: + FunctionType function_; +}; + +/// Normalizes input tensors by subtracting the supplied mean and dividing by +/// the given standard deviation. +template +struct Normalize : public TensorTransform { + /// Constructs a `Normalize` transform. The mean and standard deviation can be + /// anything that is broadcastable over the input tensors (like single + /// scalars). + Normalize(ArrayRef mean, ArrayRef stddev) + : mean(torch::tensor(mean, torch::kFloat32) + .unsqueeze(/*dim=*/1) + .unsqueeze(/*dim=*/2)), + stddev(torch::tensor(stddev, torch::kFloat32) + .unsqueeze(/*dim=*/1) + .unsqueeze(/*dim=*/2)) {} + + torch::Tensor operator()(Tensor input) override { + return input.sub(mean).div(stddev); + } + + torch::Tensor mean, stddev; +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h new file mode 100644 index 0000000000000000000000000000000000000000..b2464e930cf6d35442ce2ae4f7fbb3fa92772f58 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h @@ -0,0 +1,151 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::nn { + +class ParameterDictImpl : public Cloneable { + public: + using Iterator = OrderedDict::Iterator; + using ConstIterator = OrderedDict::ConstIterator; + + ParameterDictImpl() = default; + + explicit ParameterDictImpl( + const torch::OrderedDict& params) { + parameters_ = params; + } + + /// `reset()` is empty for `ParameterDict`, since it does not have + /// parameters of its own. + void reset() override {} + + /// Pretty prints the `ParameterDict` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ParameterDict(" << '\n'; + for (const auto& pair : parameters_) { + stream << '(' << pair.key() << ')' << ": Parameter containing: [" + << pair.value().scalar_type() << " of size " + << pair.value().sizes() << ']'; + ; + stream << '\n'; + } + stream << ')'; + } + + /// Insert the parameter along with the key into ParameterDict + /// The parameter is set to be require grad by default + Tensor& insert(const std::string& key, const Tensor& param) { + bool requires_grad = param.requires_grad(); + return register_parameter(key, param, requires_grad); + } + + /// Remove key from the ParameterDict and return its value, throw exception + /// if the key is not contained. Please check contains(key) before for a + /// non-throwing access. + Tensor pop(const std::string& key) { + torch::Tensor v = parameters_[key]; + parameters_.erase(key); + return v; + } + + /// Return the keys in the dict + ::std::vector keys() const { + return parameters_.keys(); + } + + /// Return the Values in the dict + ::std::vector values() const { + return parameters_.values(); + } + + /// Return an iterator to the start of ParameterDict + Iterator begin() { + return parameters_.begin(); + } + + /// Return a const iterator to the start of ParameterDict + ConstIterator begin() const { + return parameters_.begin(); + } + + /// Return an iterator to the end of ParameterDict + Iterator end() { + return parameters_.end(); + } + + /// Return a const iterator to the end of ParameterDict + ConstIterator end() const { + return parameters_.end(); + } + + /// Return the number of items currently stored in the ParameterDict + size_t size() const noexcept { + return parameters_.size(); + } + + /// Return true if the ParameterDict is empty, otherwise return false + bool empty() const noexcept { + return parameters_.is_empty(); + } + + /// Update the ParameterDict with the key-value pairs from + /// another ParameterDict, overwriting existing key + template + void update(const Container& container) { + for (auto& item : container) { + parameters_[item.key()] = item.value(); + } + } + + /// Remove all parameters in the ParameterDict + void clear() { + parameters_.clear(); + } + + /// Check if the certain parameter with the key in the ParameterDict + bool contains(const std::string& key) const noexcept { + return parameters_.contains(key); + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + const Tensor& get(const std::string& key) const { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + Tensor& get(const std::string& key) { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + Tensor& operator[](const std::string& key) { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + const Tensor& operator[](const std::string& key) const { + return parameters_[key]; + } +}; + +TORCH_MODULE(ParameterDict); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..8aa0d835c9d52fac0c36d6ed52038dc0b26a9357 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +using torch::autograd::variable_list; + +/// C++ API of Distributed Autograd that kicks off the distributed backward pass +/// using the provided roots. This currently implements the +/// :ref:`fast-mode-algorithm` which assumes all RPC messages sent in the same +/// distributed autograd context across workers would be part of the autograd +/// graph during the backward pass. +/// +/// We use the provided roots to discover the autograd graph and compute +/// appropriate dependencies. This method blocks until the entire +/// autograd computation is done. +/// This function accumulates gradients in the leaves - you might need to zero +/// them before calling it. +/// +/// \param context_id The autograd context id for which we should retrieve the +/// gradients. +/// \param roots Tensors which represent the roots of the autograd computation. +/// All the tensors should be scalars. +/// \param retain_graph If `false`, the graph used to compute the grad will be +/// freed. Note that in nearly all cases setting this +/// option to `true` is not needed and often can be worked +/// around in a much more efficient way. Usually, you need +/// to set this to `true` to run backward multiple times. +TORCH_API void backward( + int64_t context_id, + const variable_list& roots, + bool retain_graph = false); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h new file mode 100644 index 0000000000000000000000000000000000000000..c28137cb4fbf2f19118ba4b546d6c612aefaf35c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h @@ -0,0 +1,167 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::distributed::autograd { + +// Singleton class per worker which is responsible for storing the distributed +// autograd context for each autograd pass and also cleans up data for an +// autograd pass once its done. +// +// Each autograd pass is assigned a unique autograd_context_id and all data for +// that pass (DistAutogradContext) is stored in this container indexed by the +// autograd_context_id. The autograd_context_id itself is a 64 bit globally +// unique id. The first 16 bits is the worker_id and the next 48 bits is an +// auto-incrementing id for each worker. +// +// This container is also responsible for maintaining a globally unique message +// id, which is used to associate send/recv autograd function pairs. The format +// is similar to the autograd_context_id where we have a 64 bit integer with +// first 16 bits being the worker id and next 48 bits are auto-incrementing. +class TORCH_API DistAutogradContainer { + public: + explicit DistAutogradContainer(uint32_t num_shards); + + // One time initialization of the container. + static DistAutogradContainer& init(int64_t worker_id); + + // Retrieve the singleton instance of the container, ensures we have + // initialized the container. + static DistAutogradContainer& getInstance(); + + // Create a new context for a distributed autograd pass. + const ContextPtr newContext(); + + // Clean up resources for a given context_id once the autograd pass is done. + // Sends RPC to other workers this worker knows about, telling them to clean + // up their context as well. Throws an exception if the context_id does not + // exist. + void releaseContext(int64_t context_id); + + // Releases an autograd context if it is present on this node. Also sends RPC + // to other workers this worker knows about, telling them to clean up their + // context. Does nothing if it is not present. + void releaseContextIfPresent(int64_t context_id); + + // Checks if the passed in context_id is valid. + void isValidContext(int64_t context_id); + + // Retrieve the autograd context for a given context_id. + ContextPtr retrieveContext(int64_t context_id); + + // Retrieves the currently active autograd context for the current thread. + ContextPtr currentContext(); + + // Checks whether or not the current thread has a valid autograd context. + bool hasValidContext() const; + + // Generate a new autograd_message_id for send/recv autograd functions. + int64_t newAutogradMessageId(); + + // Creates a new autograd context with the provided context_id. If a context + // already exists with the provided context_id, we just return it. + // This does not set the current context for the current thread. + ContextPtr getOrCreateContext(int64_t context_id); + + // Retrieves the maximum possible autograd_context_id/autograd_message_id that + // can be generated by this worker. + int64_t getMaxId(); + + // Retrieves the worker ID for this node + rpc::worker_id_t getWorkerId() const; + + // Can set current context id if there is no valid context yet + static void setCurrentContextId(int64_t contextId); + + // Forcibly sets the thread local current context id. Should only be used in + // cases where you know what you're doing and need to override the thread + // local. Otherwise, use setCurrentContextId instead. + static void forceCurrentContextId(int64_t contextId); + + // Clear current context id + void clearCurrentContext(); + + // Returns the number of autograd contexts in the container. + size_t numAutogradContexts() const; + + // Returns the current thread local context id for this thread. + static int64_t currentContextId(); + + DistAutogradContainer() = delete; + ~DistAutogradContainer() = default; + DistAutogradContainer(const DistAutogradContainer&) = delete; + DistAutogradContainer& operator=(const DistAutogradContainer&) = delete; + DistAutogradContainer(DistAutogradContainer&&) = delete; + DistAutogradContainer& operator=(DistAutogradContainer&&) = delete; + + private: + // Number of shards for the map storing autograd contexts. We'd like this + // to be a power of 2 and we don't expect a value much higher than the + // number of cores would provide much benefit. + static constexpr uint32_t kNumDefaultShards = 128; + + // Use cache line size for alignment. + static constexpr int kCacheLineSize = 64; + + // Structure holding one shard of the sharded autograd context map with its + // associated lock. Align to cache line size to avoid contention between + // adjacent entries. + struct alignas(kCacheLineSize) ContextsShard { + // Lock for this shard. + mutable std::mutex lock; + + // Map storing autograd contexts for this shard. + std::unordered_map contexts; + }; + + static DistAutogradContainer& getInstanceInternal(); + + // Retrieve the shard for given context_id. + ContextsShard& getShard(int64_t context_id); + + // Sends an RPC to the workers that have a context corresponding to passed in + // context_id. This function should be called with the lock. + void sendReleaseContextRpc( + const std::unordered_set& workerIds, + int64_t context_id); + + // Erase context_id from the autograd context map, and reset the thread local + // current context id if it corresponds to the passed in context id. This + // function should be called with the lock. + void eraseContextIdAndReset(ContextsShard& shard, int64_t context_id); + + // Compute the number of shards for the autograd_contexts_ map. + static uint32_t computeNumShards(); + + // Auto incrementing context id used to identify unique autograd passes. + // Initialized with the first 16 bits being the worker_id. + std::atomic next_context_id_; + + // Unique id to identify a worker in the distributed setting. + int16_t worker_id_; + + // Whether or not the container has been initialized appropriately. + bool initialized_; + + // Sharded autograd context map. + std::vector autograd_contexts_; + + // Number of shards for the sharded autograd_contexts_ map. + uint32_t num_shards_; + + // Autograd message id to identify unique send/recv autograd function pairs. + std::atomic next_autograd_message_id_; + + // Maximum allowed value for autograd_context_id or autograd_message_id. + int64_t max_id_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h new file mode 100644 index 0000000000000000000000000000000000000000..09de23c1154c2c496ba6af372f993e3a9477a310 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +class RecvRpcBackward; + +// DistAutogradContext which stores information for a single distributed +// autograd pass on a worker. +class TORCH_API DistAutogradContext { + public: + using GradCallback = std::function; + + explicit DistAutogradContext(int64_t contextId); + ~DistAutogradContext() = default; + + // Retrieves the autograd context id for this context. + int64_t contextId() const; + + // Records a 'send' autograd function for this context with the provided + // message id. + void addSendFunction( + const std::shared_ptr& func, + int64_t autograd_message_id); + + // Records a 'recv' autograd function for this context with the provided + // message id. + void addRecvFunction( + std::shared_ptr& func, + int64_t autograd_message_id); + + // Given an autograd_message_id, retrieve the appropriate send function. + std::shared_ptr retrieveSendFunction( + int64_t autograd_message_id); + + // Return all send functions for this context. + std::unordered_map> sendFunctions() + const; + + // Return all recv functions for this context. + std::unordered_map> recvFunctions() + const; + + // Adds a future message recording an outstanding RPC. + void addOutstandingRpc(const c10::intrusive_ptr& jitFuture); + + // Returns all gradients. + const c10::Dict getGradients() const; + + // This function gives a mutable grad reference to the callback. + // If the callback returns true, it means the grad in the context + // needs to be updated. + void runGradCallbackForVariable( + const torch::autograd::Variable& variable, + const GradCallback& cb); + + DistAutogradContext(const DistAutogradContext&) = delete; + DistAutogradContext& operator=(const DistAutogradContext&) = delete; + DistAutogradContext(DistAutogradContext&&) = delete; + DistAutogradContext& operator=(DistAutogradContext&&) = delete; + + // records the workerID of a node that we sent an RPC to. + // workerIDs are added here when we attach a send function to this autograd + // context + void addKnownWorkerId(const rpc::worker_id_t workerId); + + // Retrieves a set containing the known workerIds for this context + // These are the different workers that this context has sent RPCs to. + std::unordered_set getKnownWorkerIds() const; + + private: + friend class BackwardPassCleanupGuard; + friend class DistEngine; + friend class RecvRpcBackward; + friend class DistAccumulateGradCaptureHook; + + // Record that we would like to accumulate the provided gradient on the given + // variable. + void accumulateGrad( + const torch::autograd::Variable& variable, + const torch::Tensor& grad, + size_t num_expected_refs); + + // Retrieve the GraphTask. + std::shared_ptr retrieveGraphTask(); + + // Set the appropriate graph task for the backward pass. Can be called only + // once. + void setGraphTask(std::shared_ptr graphTask); + + // Resets the graph task to ensure we can run another distributed backward + // pass for the same autograd context. + void resetGraphTask(); + + // Waits for all outstanding RPCs for this context to finish and clears all + // outstanding rpcs held in this context. This should be called only once. + c10::intrusive_ptr clearAndWaitForOutstandingRpcsAsync(); + + void clearOutstandingRpcs(); + + // Record an event to mark the completion of gradient computation. These + // events will later help to properly synchronize gradients consumptions + // in getGradients(). We need these events because backward and + // optimizer.step are separate RPC calls, and will occur on different CUDA + // streams. Without synchronization, it is possible that gradients are + // consumed before they are ready. + void recordGradEvent(c10::Device device); + + const int64_t contextId_; + + // Set containing known worker IDs, used in cleaning up autograd context. + // Whenever a sendRpcBackward is attached to the autograd graph for this + // context, the destination is added here. + std::unordered_set knownWorkerIds_; + + // Map from autograd_message_id to appropriate 'send' autograd function. + std::unordered_map> + sendAutogradFunctions_; + + // Map from autograd_message_id to appropriate 'recv' autograd function. + std::unordered_map> + recvAutogradFunctions_; + + // Gradients accumulated in this context so far. The key is the variable on + // which the gradient needs to be accumulated and the value is the gradient + // that needs to be accumulated on that variable.. + c10::Dict accumulatedGrads_; + + // See comments for recordGradEvent(c10::Device device); + std::unordered_map gradReadyEvents_; + const c10::impl::VirtualGuardImpl impl_; + + // The autograd GraphTask for the backward pass on this node for this context. + std::shared_ptr graphTask_; + + // List of futures for RPCs initiated by this node to propagate gradients to + // other nodes. The distributed autograd engine on this node can return + // successfully only if all these futures are done and are successful. + std::vector> outStandingRpcs_; + + // Lock to protect concurrent modification of the context. + mutable std::mutex lock_; +}; + +using ContextPtr = std::shared_ptr; + +// This class stores a shared_ptr to a DistAutogradContext instance in a +// thread local variable. The instance is given by the call site. The class +// doesn't know the current context. It's just a util class. +class TORCH_API ThreadLocalDistAutogradContext { + public: + // Store 'new_context' to the thread local variable maintained by this class. + explicit ThreadLocalDistAutogradContext(ContextPtr&& new_context); + ~ThreadLocalDistAutogradContext(); + + // Retrieve the stored DistAutogradContext instance. + static ContextPtr getContextPtr(); + + private: + ContextPtr prev_context_ptr_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h new file mode 100644 index 0000000000000000000000000000000000000000..aef84bb4113f439f667d3745a426266db14ccbaa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h @@ -0,0 +1,177 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Forward declaration. +class BackwardPassCleanupGuard; + +// This is a singleton class responsible for running distributed backward +// passes. This engine relies heavily on the vanilla autograd engine and tries +// to reuse it as much as possible. This class is mostly responsible for the +// distributed aspects of autograd and tries to hook into the autograd engine +// where convenient. + +// Unlike the vanilla autograd engine, the distributed autograd engine +// accumulates the gradients in the appropriate DistAutogradContext. This avoids +// multiple trainer nodes stomping on each others gradients. +class TORCH_API DistEngine { + public: + // Retrieve the singleton instance. + static DistEngine& getInstance(); + + // Given a list of root variables, start the distributed backwards pass from + // these variables and accumulate all the gradients in the current autograd + // context on each node. This method is used to kickoff distributed autograd + // on a single node. + void execute( + int64_t context_id, + const torch::autograd::variable_list& roots, + bool retainGraph); + + // Given a send function to execute in the autograd engine, ensures we compute + // dependencies once for this node and enqueues the send function for execute + // in the engine. + // This method is used to kick off the autograd computation on a node when it + // receives gradients from the corresponding 'recv' method on another node. + // The gradients are accumulated in the provided autograd context. + c10::intrusive_ptr executeSendFunctionAsync( + const ContextPtr& autogradContext, + const std::shared_ptr& sendFunction, + bool retainGraph); + + // Number of backward passes currently running for the Distributed Engine. + size_t numBackwardPasses() const; + + // Returns key-value pairs consisting of useful debugging information related + // to distributed autograd. + std::unordered_map getDebugInfo() const; + + DistEngine(const DistEngine&) = delete; + DistEngine& operator=(const DistEngine&) = delete; + DistEngine(DistEngine&&) = delete; + DistEngine& operator=(DistEngine&&) = delete; + + private: + // Make sure this is a singleton. + DistEngine(); + ~DistEngine(); + + // Validates the input roots for the backward computations and retrieves the + // appropriate root edges and corresponding gradients. Populates root_edges + // with the appropriate gradient edges and grads with the gradients for each + // edge. + void validateRootsAndRetrieveEdges( + const torch::autograd::variable_list& roots, + torch::autograd::edge_list& rootEdges, + torch::autograd::variable_list& grads); + + // Given the autograd context, root edges and grads, we compute dependencies + // for the local node and fill out the provided GraphTask and GraphRoot with + // appropriate information for the local autograd engine. + // We also determine all leaf nodes(functions) in the graph and accumulate + // them in outputEdges. + void computeDependencies( + const ContextPtr& context, + const torch::autograd::edge_list& rootEdges, + const torch::autograd::variable_list& grads, + const std::shared_ptr& graphRoot, + torch::autograd::edge_list& outputEdges, + bool retainGraph); + + // Given a pre-populated GraphTask and a root node, compute the backward pass + // for the autograd graph until the graph task ready queue is empty. + // + // This method assumes that the appropriate GraphTask has already been + // initialized appropriately. It will construct a local ready queue to + // traverse the GraphTask instead of using the GraphTask embedded + // cpu_ready_queue, this is because dist engine might run the same GraphTask + // from different SendFunctions concurrently in different threads. The method + // will only mark the GraphTask as completed when it needs to, which means it + // might not mark as completed for every call as dist engine would like to + // keep the GraphTask alive when it not receives all gradients. + // + // When `incrementOutstandingTasks=false`, the function does not increment + // 'outstanding_tasks_' in the appropriate GraphTask. It is assumed we've + // already done this before hand for this task (to ensure we don't pre-mark + // this graph_task as completed). This is useful in the distributed autograd + // case where we need to increment 'outstanding_tasks_' first to indicate the + // local autograd engine the graph task is not completed until it receives the + // signals from other workers over the network. + // + // XXX: calling this function assumes that we will have NO GPU nodetasks be + // executed for the graph_task, the caller of this function need to ensure + // this otherwise there will be undefined behaviors. A correct way to fix this + // is to re-design the autograd engine so that GPU worker thread to behave the + // same as CPU caller thread, record the operation/thread for the device, and + // reuse it in backward. + // TODO: 1. Add assert in the dist engine to ensure no GPU NodeTasks during + // backward + // 2. properly setup the thread local ready queue to enable reentrant + // backwards + void execute_graph_task_until_ready_queue_empty( + torch::autograd::NodeTask&& node_task, + bool incrementOutstandingTasks = true); + + // Run the local autograd engine using the provided graphTask and graphRoot + // and accumulate the gradients part 'outputEdges' in the provided autograd + // context. + c10::intrusive_ptr runEngineAndAccumulateGradients( + const ContextPtr& autogradContext, + const std::shared_ptr& graphRoot, + const torch::autograd::edge_list& outputEdges, + bool incrementOutStandingTasks = true); + + // Run after the backward pass is done to appropriately cleanup structures. + void cleanupBackwardPass(const ContextPtr& autogradContext); + + // Global thread to execute CPU continuations. + void globalCpuThread( + const std::shared_ptr& ready_queue); + + // Set of autograd context_ids, which we have already initialized for + // distributed autograd on this node (e.g.: already computed dependencies) + std::unordered_set initializedContextIds_; + + mutable std::mutex initializedContextIdsLock_; + + // Reference to local autograd engine. + torch::autograd::Engine& engine_; + + // Ready queue used by the CPU thread in distributed engine. + // See Note [GPU to CPU continuations] + std::shared_ptr global_cpu_ready_queue_; + + // See Note [GPU to CPU continuations] + std::thread global_cpu_thread_; + + friend class BackwardPassCleanupGuard; +}; + +// Guard to clean up resources once the backward pass is done. +class BackwardPassCleanupGuard { + public: + explicit BackwardPassCleanupGuard(ContextPtr autogradContext) + : autogradContext_(std::move(autogradContext)) {} + + ~BackwardPassCleanupGuard() { + DistEngine::getInstance().cleanupBackwardPass(autogradContext_); + } + + private: + ContextPtr autogradContext_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..ec9959de9fc137a3da052c0be05e566ede4c6bdb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Forward declarations. +class DistAutogradContext; + +// As part of our distributed autograd implementation, whenever we receive an +// RPC from a node, we add a 'RecvRpcBackward' autograd function to the +// autograd graph. This is more or less a placeholder function that is used to +// pass gradients to the remote host during the backward pass. The inputs to the +// RPC function are the inputs to this autograd function. +class TORCH_API RecvRpcBackward : public torch::autograd::Node { + public: + explicit RecvRpcBackward( + const AutogradMetadata& autogradMetadata, + const std::shared_ptr& autogradContext, + rpc::worker_id_t fromWorkerId, + rpc::DeviceMap deviceMap); + + torch::autograd::variable_list apply( + torch::autograd::variable_list&& grads) override; + + private: + const AutogradMetadata autogradMetadata_; + + // Hold a weak reference to the autograd context to avoid circular + // dependencies with the context (since it holds a reference to + // RecvRpcBackward). + std::weak_ptr autogradContext_; + + // The worker id from which the RPC was received. During the backward pass, + // we need to propagate the gradients to this workerId. + rpc::worker_id_t fromWorkerId_; + + // Device mapping for tensors sent over RPC. + const rpc::DeviceMap deviceMap_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..e9d651a206361595c8c5f82d67545f29811d8b51 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::autograd { + +// As part of our distributed autograd implementation, whenever we send an RPC +// from one node to another, we add a 'SendRpcBackward' autograd function to the +// autograd graph. This is more or less a placeholder function that is used to +// kickoff the autograd engine on the current worker on the backward pass. The +// edges for this autograd function are the inputs to the RPC method. +// +// During the backward pass, this function is queued for execution in the +// autograd engine which eventually runs the rest of the autograd graph. +struct TORCH_API SendRpcBackward : public torch::autograd::Node { + public: + torch::autograd::variable_list apply( + torch::autograd::variable_list&& inputs) override; + + // SendRpcBackward is actually the root of an autograd graph on the local + // node. As a result, it doesn't receive any 'inputs', but rather the RPC + // framework passes gradients over to this function to kickoff local autograd + // computation. + void setGrads(const torch::autograd::variable_list& grads); + + // Retrieve the grads for the function. + const torch::autograd::variable_list& getGrads() const; + + private: + torch::autograd::variable_list grads_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..2dd0604b70cc367ced7f549ec00841affbe4b7f9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::autograd { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..c6b35f8f5c9590e55195de8618c6acc6de15e719 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// This structure represents autograd metadata that we need to pass across +// different nodes when we call an RPC which needs autograd computation. +struct TORCH_API AutogradMetadata { + AutogradMetadata(int64_t autogradContextId, int64_t autogradMessageId); + + // autogradContextId_ is a globally unique integer that identifies a + // particular distributed autograd pass. + int64_t autogradContextId; + // autogradMessageId_ is a globally unique integer that identifies a pair + // of send/recv autograd functions. + int64_t autogradMessageId; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h new file mode 100644 index 0000000000000000000000000000000000000000..94948652d8f95a90343351b180433f10db90b353 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Used to request other workers to clean up their autograd context. +class TORCH_API CleanupAutogradContextReq : public rpc::RpcCommandBase { + public: + explicit CleanupAutogradContextReq(int64_t context_id); + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieve the context id we are cleaning up with this message. + int64_t getContextId(); + + private: + int64_t context_id_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..6b1733c5aa61bd1c39837e7b84bbb748a3c7bc77 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Empty response for CleanupAutogradContextReq. Send to acknowledge receipt of +// a CleanupAutogradContextReq. +class TORCH_API CleanupAutogradContextResp : public rpc::RpcCommandBase { + public: + CleanupAutogradContextResp() = default; + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h new file mode 100644 index 0000000000000000000000000000000000000000..e56f32af955b8653fecf6dd92c06eddc76ab1c2a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Used to propagate gradients from one node to another during a distributed +// backwards pass. This RPC call is invoked when we hit a `recv` autograd +// function during backward pass execution. +class TORCH_API PropagateGradientsReq : public rpc::RpcCommandBase { + public: + PropagateGradientsReq( + const AutogradMetadata& autogradMetadata, + std::vector grads, + bool retainGraph = false); + + const AutogradMetadata& getAutogradMetadata(); + + const std::vector& getGrads(); + + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Whether or not to retain the autograd graph. + bool retainGraph(); + + private: + AutogradMetadata autogradMetadata_; + std::vector grads_; + bool retainGraph_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..4e2fd092c76b3a36479d61e4376dd8b4e5da5135 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Response for the PropagateGradients call. Currently, this class is mostly +// just a placeholder and sends an empty message over the wire. The purpose of +// this RPC command is to indicate whether or not the PropagateGradientsReq call +// was successfully or not. +class TORCH_API PropagateGradientsResp : public rpc::RpcCommandBase { + public: + PropagateGradientsResp() = default; + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..66df8c943e9d3d2a9af59fa135ab584cc95ae8f6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Represents an RPC that includes autograd information. This class basically +// wraps another `RpcCommandBase` object which represents the actual RPC and has +// additional autograd information associated with that RPC. +class TORCH_API RpcWithAutograd final : public rpc::RpcCommandBase { + public: + // Used when we are sending an RPC over the wire. + RpcWithAutograd( + rpc::worker_id_t fromWorkerId, + rpc::MessageType messageType, + const AutogradMetadata& autogradMetadata, + c10::intrusive_ptr wrappedMessage, + rpc::DeviceMap deviceMap = {}); + + // Used when receiving an RPC over the wire. + RpcWithAutograd( + rpc::worker_id_t fromWorkerId, + rpc::MessageType messageType, + const AutogradMetadata& autogradMetadata, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + rpc::DeviceMap deviceMap = {}); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieves tensors as part of this RPC, which need to be considered for + // autograd computations. + std::vector& tensors(); + + const AutogradMetadata& autogradMetadata() const; + + RpcCommandBase& wrappedRpc(); + + void setWrappedRpc(std::unique_ptr wrappedRpc); + + std::unique_ptr moveWrappedRpc() &&; + + // Message type of the wrapped RPC. + rpc::MessageType wrappedMessageType() const; + + // Retrieve the worker id from which the RPC originated. + rpc::worker_id_t fromWorkerId() const; + + // Retrieve the device map. + const rpc::DeviceMap& deviceMap(); + + private: + // WorkerId from which this RPC originated. This is necessary for knowing + // which worker we need to contact during the backward pass. + rpc::worker_id_t fromWorkerId_; + + // Message type for this call. + rpc::MessageType messageType_; + + AutogradMetadata autogradMetadata_; + + // Since wrappedMessage_ is destructively constructed from wrappedRpc_, + // they are valid exclusively. They are used for different purpose. + // wrappedRpc_ is used while constructing receive rpcWithAutograd; + // wrappedMessage_ is used while constructing send rpcWithAutograd; + + // When receive rpcWithAutograd is constructed fromMessage, it is valid; + // When send rpcWithAutograd is constructed before toMessage, it is nullptr; + std::unique_ptr wrappedRpc_; + + // Serialized message representing wrappedRpc_. Used mostly as a cache to + // avoid serializing the request twice. + // When receive rpcWithAutograd is constructed fromMessage, it is nullptr; + // When send rpcWithAutograd is constructed before toMessage, it is valid; + c10::intrusive_ptr wrappedMessage_; + + // message type of the wrappedMessage, this is stored separately since + // wrappedMessage_ is not always guaranteed to be populated. + rpc::MessageType wrappedMessageType_; + + // Tensors part of the wrappedRpc that need to be considered for autograd. + std::vector tensors_; + + // Device mapping for tensors that are sent across an RPC to another node. + rpc::DeviceMap deviceMap_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h new file mode 100644 index 0000000000000000000000000000000000000000..8bfc2764e9e172cc63d8325cc363ffd35589106e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +class TORCH_API RpcWithProfilingReq : public rpc::RpcCommandBase { + public: + // For sending RPCs, invoked when client is creating this RPC command. + RpcWithProfilingReq( + rpc::MessageType messageType, + c10::intrusive_ptr wrappedMessage, + torch::autograd::profiler::ProfilerConfig&& profilerConfig, + rpc::ProfilingId profilingKeyId); + + // For receiving an RPC + // Used in fromMessage. + RpcWithProfilingReq( + rpc::MessageType messageType, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + torch::autograd::profiler::ProfilerConfig&& profilerConfig, + rpc::ProfilingId profilingKeyId); + + // Convert this RPC Command to a Message that can be sent over the wire. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieve the profiling data that is associated with this command. + torch::autograd::profiler::ProfilerConfig getProfilingConfig() const; + // Retrieve the globally unique profiling ID corresponding to this command. + const rpc::ProfilingId& getProfilingId() const; + // Retrieve the original RPC which this ProfilingRPC wraps. + RpcCommandBase& wrappedRpc(); + // Destructively move the wrapped RPC. + std::unique_ptr moveWrappedRpc() &&; + // Message type of the wrapped RPC + rpc::MessageType wrappedMessageType() const; + void setWrappedRpc(std::unique_ptr wrappedRpc); + + private: + // message type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::MessageType messageType_; + // wrapped message + c10::intrusive_ptr wrappedMessage_; + std::unique_ptr wrappedRpc_; + rpc::MessageType wrappedMessageType_; + std::vector tensors_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const torch::autograd::profiler::ProfilerConfig profilerConfig_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::ProfilingId profilingKeyId_; +}; +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..fb9db4ccb84e2f961a3829d2ec28dfec7fcb2136 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { +class TORCH_API RpcWithProfilingResp : public rpc::RpcCommandBase { + public: + // For sending RPCs over the wire + RpcWithProfilingResp( + rpc::MessageType messageType, + c10::intrusive_ptr wrappedMessage, + std::vector profiledEvents, + rpc::ProfilingId profilingId); + + // For receiving RPCs. Used in from message when converting a message received + // over the wire. + RpcWithProfilingResp( + rpc::MessageType messageType, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + std::vector profiledEvents, + rpc::ProfilingId profilingId); + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + // Retrieve remote Events + std::vector getProfiledEvents() const; + // Retrieve the globally unique profiling ID corresponding to this command. + const rpc::ProfilingId& getProfilingId() const; + // Retrieve the original RPC which this ProfilingRPC wraps. + RpcCommandBase& wrappedRpc(); + // Destructively move the wrapped RPC. + std::unique_ptr moveWrappedRpc() &&; + // Message type of the wrapped RPC + rpc::MessageType wrappedMessageType() const; + // Set the wrapped RPC for this RPC. + void setWrappedRpc(std::unique_ptr wrappedRpc); + + private: + // message type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::MessageType messageType_; + // wrapped message + c10::intrusive_ptr wrappedMessage_; + std::unique_ptr wrappedRpc_; + rpc::MessageType wrappedMessageType_; + std::vector tensors_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector profiledEvents_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::ProfilingId profilingId_; +}; +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h new file mode 100644 index 0000000000000000000000000000000000000000..1cb78980aa6deeef29a2e918eaf87ed970b7a127 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Internal system RPC to invoke distributed backward pass on remote nodes when +// 'rref.backward()' is invoked. +class TORCH_API RRefBackwardReq : public rpc::RpcCommandBase { + public: + RRefBackwardReq( + const rpc::RRefId& rrefId, + int64_t autogradContextId, + bool retainGraph = false); + + const rpc::RRefId& getRRefId() const; + + int64_t getAutogradContextId() const; + + bool retainGraph() const; + + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int64_t autogradContextId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool retainGraph_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..c4bf412302ced62d07a5ca8a24675376f1ed2b68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Response for the RRefBackwardReq. +class TORCH_API RRefBackwardResp : public rpc::RpcCommandBase { + public: + RRefBackwardResp() = default; + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..47903ceb5e9bf2491e8896cdc2fed920b03e9448 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// This method is used to attach the 'send' autograd function to the autograd +// graph when we use RPC. This method creates a new 'send' autograd function +// and attaches the provided tensors as next_edges to the 'send' function. In +// addition to this, it also registers the send function in the provided +// autograd context. Finally, the RPC message is updated with appropriate +// autograd information for the recipient. +TORCH_API void addSendRpcBackward( + const ContextPtr& autogradContext, + const AutogradMetadata& autogradMetadata, + std::vector& tensors); + +// This method is used to attach the 'recv' autograd function to the autograd +// graph when we use RPC. This method creates a new 'recv' autograd function +// and attaches the provided tensors as inputs to the 'recv' function. It +// creates a new autograd context if needed and registers the 'recv' function +// with this context. +// +// Returns a pointer to the autograd context created. +TORCH_API ContextPtr addRecvRpcBackward( + const AutogradMetadata& autogradMetadata, + std::vector& tensors, + rpc::worker_id_t fromWorkerId, + const rpc::DeviceMap& deviceMap); + +// This method is a wrapper utility used internally to wrap autograd info +// and attach autograd function for each type of rpc call if it has valid +// context and tensors require grads or forceGradRecording is true, in this +// case, return RpcWithAutograd message; otherwise return original rpc message. +// NB: forceGradRecording is useful when the request does not contain any tensor +// but the corresponding response does. +TORCH_API c10::intrusive_ptr getMessageWithAutograd( + const rpc::worker_id_t dstId, + c10::intrusive_ptr wrappedRpcMsg, + rpc::MessageType msgType, + bool forceGradRecording = false, + const rpc::DeviceMap& deviceMap = {}); + +// Send message after autograd checking +TORCH_API c10::intrusive_ptr sendMessageWithAutograd( + rpc::RpcAgent& agent, + const rpc::WorkerInfo& dst, + c10::intrusive_ptr wrappedRpcMsg, + bool forceGradRecording = false, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + bool forceDisableProfiling = false); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8a979143f9119d48da9dac1a89a943ddf4a85f5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp @@ -0,0 +1,535 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +constexpr auto kBackendDefaultTimeout = + std::chrono::milliseconds(30 * 60 * 1000); + +namespace c10d { + +enum class ErrorType { + SUCCESS = 0, + TIMEOUT = 1, + // e.g., NCCL error, etc + COMM_ERROR = 2, + // TODO, do we need to distinguish between remote timeout or remote COMM + // errors? + REMOTE_ERROR = 3 +}; + +class TORCH_API Backend : public torch::CustomClassHolder { + public: + // Backend Options is a base struct that defines the basic options + // when constructing a Backend. Each Backend subclass should + // extend this struct and define its options if it wants to provide more + // config options (beyond basic ones defined here) to end user. + struct TORCH_API Options : torch::CustomClassHolder { + explicit Options( + std::string backend, + std::chrono::milliseconds timeout = kBackendDefaultTimeout) + : timeout(timeout), backend(std::move(backend)) {} + ~Options() override = default; + + std::chrono::milliseconds timeout; + + // backend name + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string backend; + std::string group_name; + std::vector global_ranks_in_group; + }; + + explicit Backend(int rank, int size); + ~Backend() override = 0; + + int getRank() const { + return rank_; + } + + int getSize() const { + return size_; + } + + // Returns an unique opaque ID of this backend that can be used to correlate + // with its collectives. + int64_t getID() const { + return reinterpret_cast(this); + } + + virtual bool supportsSplitting() const { + return false; + } + + virtual bool supportsCoalescing() const { + return false; + } + + virtual bool supportsTimeEstimation() const { + return false; + } + + virtual bool supportsShrinking() const { + return false; + } + + // Shrink the backend by excluding specified ranks. Backends that support + // communicator shrinking should override this and return a new backend + // instance representing the shrunken group. Backends may use opts_override + // to supply backend-specific options for the new group. + virtual c10::intrusive_ptr shrink( + const std::vector& /*ranks_to_exclude*/, + int /*shrink_flags*/ = 0, + const c10::intrusive_ptr& /*opts_override*/ = nullptr) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support shrink")); + } + + virtual void setTimeout(std::chrono::milliseconds timeout) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support setting timeout")); + } + + virtual void startCoalescing() { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not implement startCoalescing")); + } + + virtual c10::intrusive_ptr endCoalescing() { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not implement endCoalescing")); + } + + // Subclasses must override this method to return the backend name + virtual const std::string getBackendName() const { + TORCH_INTERNAL_ASSERT(false, "getBackendName is not implemented."); + } + + // Subclasses must override this method to return the backend name + virtual c10::intrusive_ptr getBackendOptions() { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not implement getBackendOptions.")); + } + + virtual c10::intrusive_ptr broadcast( + std::vector& /* tensors */, + const BroadcastOptions& /* opts */ = BroadcastOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support broadcast")); + } + + virtual c10::intrusive_ptr allreduce( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support allreduce")); + } + + virtual c10::intrusive_ptr allreduce_sparse( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allreduce sparse")); + } + + virtual c10::intrusive_ptr allreduce_coalesced( + std::vector& /* tensors */, + const AllreduceCoalescedOptions& /* opts */ = + AllreduceCoalescedOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allreduce_coalesced")); + } + + virtual c10::intrusive_ptr reduce( + std::vector& /* tensors */, + const ReduceOptions& /* opts */ = ReduceOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support reduce")); + } + + virtual c10::intrusive_ptr allgather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support allgather")); + } + + // Gathers a single tensor inputBuffer into a single buffer outputBuffer that + // is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE. + // For implementers of ProcessGroup API and advanced users only. + // Note: this function will be deprecated in near future. + virtual c10::intrusive_ptr _allgather_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support _allgather_base")); + } + + // This function is deprecated and will be moved out of Backend to comms: + // * do not add dependencies on this function, + // * do not implement it in your Backend, implement _allgather_base + // instead. + virtual c10::intrusive_ptr allgather_coalesced( + std::vector>& /* outputTensorLists */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allgather_coalesced")); + } + + // This function is a coalesced version of `allgather_into_tensor` (currently + // still named as `_allgather_base`). Each tensor in the vector corresponds to + // an input/output of one `allgather_into_tensor` operation. + virtual c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allgather_into_tensor_coalesced")); + } + + virtual c10::intrusive_ptr gather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const GatherOptions& /* opts */ = GatherOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support gather")); + } + + virtual c10::intrusive_ptr scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ScatterOptions& /* opts */ = ScatterOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support scatter")); + } + + virtual c10::intrusive_ptr reduce_scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support reduce_scatter")); + } + + virtual c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support _reduce_scatter_base")); + } + + // This function is a coalesced version of `reduce_scatter_tensor` (currently + // still named as `_reduce_scatter_base`). Each tensor in the vector + // corresponds to an input/output of one `reduce_scatter_tensor` operation. + virtual c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support reduce_scatter_tensor_coalesced")); + } + + virtual c10::intrusive_ptr alltoall_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + std::vector& /* outputSplitSizes */, + std::vector& /* inputSplitSizes */, + const AllToAllOptions& /* opts */ = AllToAllOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support alltoall_base")); + } + + virtual c10::intrusive_ptr alltoall( + std::vector& /* outputTensors */, + std::vector& /* inputTensors */, + const AllToAllOptions& opts = AllToAllOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support alltoall")); + } + + virtual void monitoredBarrier( + const BarrierOptions& /* unused */, + bool /* unused */ = false) { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not support monitoredBarrier, only GLOO supports monitored barrier.")); + } + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + virtual void setSequenceNumberForGroup() { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not yet support sequence numbers.")); + } + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + virtual uint64_t getSequenceNumberForGroup() { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not yet support sequence numbers.")); + } + + virtual c10::intrusive_ptr send( + std::vector& /* tensors */, + int /* dstRank */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support send")); + } + + virtual c10::intrusive_ptr recv( + std::vector& /* tensors */, + int /* srcRank */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support recv")); + } + + virtual c10::intrusive_ptr recvAnysource( + std::vector& /* tensors */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support recvAnysource")); + } + + virtual c10::intrusive_ptr barrier( + const BarrierOptions& /* opts */ = BarrierOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support barrier")); + } + + virtual void registerOnCompletionHook( + std::function)>&& hook) { + TORCH_CHECK( + false, + "Only ProcessGrouppNCCL supports onCompletion hook, but got ", + getBackendName(), + " backend."); + } + + virtual void waitForPendingWorks() { + TORCH_CHECK( + false, + "Only ProcessGrouppNCCL supports waitForPendingWorks, but got ", + getBackendName(), + " backend."); + } + + virtual void enableCollectivesTiming() { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of enableCollectivesTiming."); + } + + virtual c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of split."); + } + + virtual c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of merge."); + } + + bool hasHooks() const { + return onCompletionHook_ != nullptr; + } + + // Do not call this directly, use ProcessGroup::setGroupName instead. + virtual void setGroupUid(const std::string& pg_uid) { + pg_uid_ = pg_uid; + } + + const std::string& getGroupUid() const { + return pg_uid_; + } + + void setGroupDesc(const std::string& desc) { + pg_desc_ = desc; + } + + const std::string& getGroupDesc() const { + return pg_desc_; + } + + // See similar functions in ProcessGroup.hpp for context. + std::optional getBoundDeviceId() const { + return bound_device_id_; + } + + // Perform an eager connect to the specified device if the backend supports + // it. + virtual void eagerConnectSingleDevice(at::Device device) { + // no-op in the default case; this is an optimization some + // backends may perform + } + + void setBoundDeviceId(std::optional device) { + if (device) { + TORCH_CHECK(device->has_index(), "setBoundDeviceId must have an index"); + } + bound_device_id_ = device; + } + + virtual ErrorType getError() { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support getError")); + } + + virtual std::shared_ptr getMemAllocator() { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support getMemAllocator")); + } + + // Allocate tensor (aten::empty) from backend's communication-optimized memory + // pool + virtual at::Tensor allocateTensor(long size, at::TensorOptions options = {}) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support allocateTensor")); + } + + // Returns true if backend supports tensor allocation + virtual bool supportsTensorAlloc(c10::DeviceIndex deviceIdx) { + // Change to true in concrete backend if supported + return false; + } + + // Aborts all pending operations and connections in the backend if the backend + // supports it. + virtual void abort() {} + + // Shutdown the backend if the backend supports it. This should be used for + // normal shutdown. + virtual void shutdown() {} + + protected: + // Implementations of this interface need to call this to setup + // appropriate logging etc. + void init(); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int size_; + // Debug level setting. It is parsed once when ProcessGroup is constructed and + // remains the same across use of this process group. + DebugLevel dist_debug_level_; + std::string pg_uid_; + std::string pg_desc_; + + std::function)> onCompletionHook_; + + std::optional bound_device_id_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp new file mode 100644 index 0000000000000000000000000000000000000000..329dc1f97857e93886096a508bcde1b20c75146d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { + +class TORCH_API Backoff { + public: + virtual ~Backoff() = default; + + virtual std::chrono::milliseconds nextBackoff() = 0; + virtual void reset() = 0; + + void sleepBackoff() { + std::this_thread::sleep_for(nextBackoff()); + } +}; + +class TORCH_API ExponentialBackoffWithJitter : public Backoff { + public: + ExponentialBackoffWithJitter(); + + std::chrono::milliseconds nextBackoff() override; + void reset() override; + + public: + std::chrono::milliseconds initialInterval{500}; + double randomizationFactor{0.5}; + double multiplier{1.5}; + std::chrono::milliseconds maxInterval{60000}; + + private: + std::mt19937 gen_; + std::chrono::milliseconds currentInterval_{0}; +}; + +class TORCH_API FixedBackoff : public Backoff { + public: + FixedBackoff(std::chrono::milliseconds interval); + + std::chrono::milliseconds nextBackoff() override; + void reset() override; + + private: + std::chrono::milliseconds interval_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..453d2f4e64f83cf9466ce951e8be8a4f666a1393 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp @@ -0,0 +1,258 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d { + +class FakeWork : public Work { + public: + int seq_id = -1; + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return true; + } + + c10::intrusive_ptr getFuture() override { + auto fut = c10::make_intrusive(c10::NoneType::get()); + fut->markCompleted(); + return fut; + } +}; + +class FakeProcessGroup : public Backend { + public: + struct Options : Backend::Options { + explicit Options() : Backend::Options("fake") {} + + int fake_option = 0; + bool error_on_collective = false; + }; + + // Static factory method for official APIs + static c10::intrusive_ptr _create_internal( + int rank, + int size, + c10::intrusive_ptr options = c10::make_intrusive()) { + return c10::make_intrusive( + rank, size, std::move(options)); + } + + const std::string getBackendName() const override { + return "fake"; + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + c10::intrusive_ptr broadcast( + std::vector& /* tensors */, + const BroadcastOptions& /* opts */ = BroadcastOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce_sparse( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce_coalesced( + std::vector& /* tensors */, + const AllreduceCoalescedOptions& /* opts */ = + AllreduceCoalescedOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce( + std::vector& /* tensors */, + const ReduceOptions& /* opts */ = ReduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + // NOTE [allgather on FakeProcessGroup] + // Assume each rank have the same input tensor so we just copy to the results + // since it's not a real allgather, we simply make this copying logic to let + // some simple validation works (i.e. calling allgather to see if each rank + // have the same tensor or not). + // + // NOTE: in general it's not good form to try to make FakeProcessGroup work + // with real data, but the reasoning here is that we want FakeProcessGroup to + // work with DeviceMesh's init code that have the data validation, which + // makes it worth the tradeoff. + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + for (auto& tensor : outputTensors[0]) { + tensor.copy_(inputTensors[0]); + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + auto chunks = outputBuffer.chunk(size_); + for (auto& tensor : chunks) { + tensor.copy_(inputBuffer); + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr allgather_coalesced( + std::vector>& /* outputTensorLists */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + for (size_t i = 0; i < outputs.size(); ++i) { + auto chunks = outputs[i].chunk(size_); + for (auto& chunk : chunks) { + chunk.copy_(inputs[i]); + } + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr gather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const GatherOptions& /* opts */ = GatherOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ScatterOptions& /* opts */ = ScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce_scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr alltoall_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + std::vector& /* outputSplitSizes */, + std::vector& /* inputSplitSizes */, + const AllToAllOptions& /* opts */ = AllToAllOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr alltoall( + std::vector& /* outputTensors */, + std::vector& /* inputTensors */, + const AllToAllOptions& opts = AllToAllOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr send( + std::vector& /* tensors */, + int /* dstRank */, + int /* tag */) override { + return c10::make_intrusive(); + } + + c10::intrusive_ptr recv( + std::vector& /* tensors */, + int /* srcRank */, + int /* tag */) override { + return c10::make_intrusive(); + } + + c10::intrusive_ptr recvAnysource( + std::vector& /* tensors */, + int /* tag */) override { + return c10::make_intrusive(); + } + + void startCoalescing() override { + // No-op + } + + c10::intrusive_ptr endCoalescing(OpType /* optype */) { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr endCoalescing() override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr barrier( + const BarrierOptions& /* opts */ = BarrierOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + // Private constructor used by official APIs + FakeProcessGroup(int rank, int size, c10::intrusive_ptr options) + : Backend(rank, size), options_(std::move(options)) {} + c10::intrusive_ptr options_; + + private: + void checkCollectiveError() { + TORCH_CHECK( + !options_ || !options_->error_on_collective, + "FakeProcessGroup collective operation error (error_on_collective=true)"); + } +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7725d9856572a906d688992dd6eaa284d019887d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include + +namespace c10d { + +class TORCH_API FileStore : public Store { + public: + explicit FileStore(std::string path, int numWorkers); + + c10::intrusive_ptr clone() override; + + ~FileStore() override; + + void set(const std::string& key, const std::vector& value) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + int64_t getNumKeys() override; + + bool deleteKey(const std::string& key) override; + + bool check(const std::vector& keys) override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + // Returns the path used by the FileStore. + const std::string& getPath() const noexcept { + return path_; + } + + std::vector listKeys() override; + + protected: + int64_t addHelper(const std::string& key, int64_t i); + + std::string path_; + off_t pos_{0}; + + int numWorkers_; + const std::string cleanupKey_; + const std::string refCountKey_; + const std::string regularPrefix_; + const std::string deletePrefix_; + + std::unordered_map> cache_; + + std::mutex activeFileOpLock_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..87a2cce06f6c36d365988cd664464a6827b2f068 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp @@ -0,0 +1,333 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace c10d { + +#define DEFINE_CONSTANT(name, value) \ + static c10::IValue name = value; \ + static std::string name##_str = value; +// Update whenever changing contents or formatting of the dump +// (minor when adding fields, major when changing existing fields) +// Also update both JSON and Pickle dumps to make use of the newly defined +// field(s). +DEFINE_CONSTANT(version_val, "2.10") +DEFINE_CONSTANT(entries_key, "entries") +DEFINE_CONSTANT(nccl_comm_key, "nccl_comm_state") +DEFINE_CONSTANT(comm_lib_version_key, "comm_lib_version") +DEFINE_CONSTANT(version_key, "version") +DEFINE_CONSTANT(pg_config_key, "pg_config") +DEFINE_CONSTANT(pg_status_key, "pg_status") +DEFINE_CONSTANT(record_id_key, "record_id") +DEFINE_CONSTANT(pg_id_key, "pg_id") +DEFINE_CONSTANT(pg_name_key, "process_group") +DEFINE_CONSTANT(collective_seq_id_key, "collective_seq_id") +DEFINE_CONSTANT(p2p_seq_id_key, "p2p_seq_id") +DEFINE_CONSTANT(is_p2p_key, "is_p2p") +DEFINE_CONSTANT(op_id_key, "op_id") +DEFINE_CONSTANT(profiling_name_key, "profiling_name") +DEFINE_CONSTANT(input_sizes_key, "input_sizes") +DEFINE_CONSTANT(input_dtypes_key, "input_dtypes") +DEFINE_CONSTANT(output_sizes_key, "output_sizes") +DEFINE_CONSTANT(output_dtypes_key, "output_dtypes") +DEFINE_CONSTANT(time_created_key, "time_created_ns") +DEFINE_CONSTANT(duration_key, "duration_ms") +DEFINE_CONSTANT(timeout_key, "timeout_ms") +DEFINE_CONSTANT(frames_key, "frames") +DEFINE_CONSTANT(state_key, "state") +DEFINE_CONSTANT(line_key, "line") +DEFINE_CONSTANT(name_key, "name") +DEFINE_CONSTANT(filename_key, "filename") +DEFINE_CONSTANT(retired_key, "retired") +DEFINE_CONSTANT(time_discovered_started_key, "time_discovered_started_ns") +DEFINE_CONSTANT(time_discovered_completed_key, "time_discovered_completed_ns") +DEFINE_CONSTANT(completed_state, "completed") +DEFINE_CONSTANT(scheduled_state, "scheduled") +DEFINE_CONSTANT(started_state, "started") +DEFINE_CONSTANT(thread_id_key, "thread_id") +DEFINE_CONSTANT(thread_name_key, "thread_name") +#undef DEFINE_CONSTANT + +// Write NCCL debug info to local disk or any storage users define. +// There are some constrains we set for the debug info writer: +// 1. The writer should only be registered once. +// 2. Once registered, users cannot change it including un-register. +// 3. It is recommended to register the customized writer in the trainer setup, +// If users don't register before calling launchAsyncDebugDump, then users +// lose the chance to register (and the default writer will be +// auto-registered). +class TORCH_API DebugInfoWriter { + public: + virtual ~DebugInfoWriter() = default; + virtual void write(const std::string& trace); + static DebugInfoWriter& getWriter(int rank); + static void registerWriter(std::unique_ptr writer); + virtual std::string getWriterTarget() { + return filename_; + } + + protected: + DebugInfoWriter( + const std::string& namePrefix, + int rank, + bool enableDynamicFilename = false) { + filename_ = c10::str(namePrefix, rank); + enable_dynamic_filename_ = enableDynamicFilename; + rank_ = rank; + } + std::string filename_; + int rank_; + bool enable_dynamic_filename_; + + private: + static std::unique_ptr writer_; + static std::atomic hasWriterRegistered_; +}; + +template +struct FlightRecorder { + static FlightRecorder* get() { + // intentionally leak on exit + // because this will hold python state that may get destructed + static FlightRecorder* instance = + new FlightRecorder(); + return instance; + } + FlightRecorder() { + max_entries_ = + getCvarInt({"TORCH_FR_BUFFER_SIZE", "TORCH_NCCL_TRACE_BUFFER_SIZE"}, 0); + capture_cpp_stack_ = getCvarBool( + {"TORCH_FR_CPP_STACK", "TORCH_NCCL_TRACE_CPP_STACK"}, false); + enabled_ = max_entries_ > 0; + reset_epoch_start_idx_[0] = 0; + } + struct Entry { + size_t id_; // incremented id in the trace buffer + // used to figure out where in the circular entries + // buffer this entry will be located to + // update state information + size_t reset_epoch_; // epoch when this entry was created + size_t pg_id_; + std::tuple pg_name_; // + + // collective_seq_id and p2p_seq_id refer to actual kernel launches (e.g. 1 + // per coalesced group). + // collective_seq_id only increments for true collective operations (over + // all ranks in the group). p2p_seq_id only increments over non-collective + // operations in the group. op_id refers to logical operations (e.g. one per + // op inside coalesced group) + size_t collective_seq_id_; + size_t p2p_seq_id_; + size_t op_id_; + std::string profiling_name_; + + std::shared_ptr traceback_; + // we borrow pointers to start_ and end_ so we can query the state + // on reporting. However, once the event is completed, the call + // to `complete` will clear these. + EventType *start_, *end_; + + // timestamp when the entry was created, likely close to the time the work + // was 'enqueued'- not necessarily started + c10::time_t time_created_; + + // configured timeout for this entry + c10::time_t timeout_ms_; + + // Is this a P2P event? + bool isP2P_; + + std::optional duration_; + + // timestamp when our CPU threads discovered that the kernel started. + // will always be _after_ it actually started, and can be very late + // if the watchdog thread got stuck on CUDA APIs. + std::optional time_discovered_started_; + + // timestamp when our CPU threads discovered that the kernel completed. + // will always be _after_ it actually completed, and can be the same time + // as the discovery of the start if the watchdog thread is stuck on CUDA + // APIs + std::optional time_discovered_completed_; + + // size information for input/output tensors + c10::SmallVector input_dims_; + std::vector input_dtypes_; + c10::SmallVector output_dims_; + std::vector output_dtypes_; + c10::SmallVector sizes_; // flattened from inputs, outputs + std::thread::id thread_id_; + std::string thread_name_; + bool retired_ = false; // is this work entry no longer in the workMetaList_? + // a retired but not completed event has timed out + + // Returns the traceback of current entry, in string form. + // Note: `getTraceback` invokes `torch::symbolize`, which may need to + // acquire the GIL. If you don't want to block the current thread or take + // the risk of a GIL deadlock, you can use an asynchronous calling mechanism + // like std::async. + TORCH_API std::string getTraceback(); + }; + + bool enabled_ = false; + bool capture_cpp_stack_ = false; + std::mutex mutex_; + std::vector entries_; + size_t max_entries_ = 0; + size_t next_ = 0; + size_t id_ = 0; + size_t reset_epoch_ = 0; + std::unordered_map + reset_epoch_start_idx_; // maps reset_epoch to the idx where it starts + std::map> all_pg_status_; + std::map, std::vector> + pg_name_to_ranks_; + std::string comm_lib_version_; + + struct TraceIdentifier { + std::optional id; + std::optional reset_epoch; + }; + + TraceIdentifier recordWithResetEnabled( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P); + + std::optional record( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P); + + TORCH_API void record_pg_ranks( + const std::tuple& pg_name, + std::vector ranks); + + void record_accelerator_version(const std::string comm_lib_version); + + void update_state(Entry& r); + + std::vector dump_entries(); + + // Returns the index in entries_ for the given id and reset_epoch. + // Caller must hold mutex_lock before calling this method. + size_t getIdxFromId(size_t id, size_t reset_epoch) const; + + // Returns the entry with the given id and reset_epoch, if it exists. + // Otherwise, returns std::nullopt. + TORCH_API std::optional getEntry( + std::optional id, + std::optional reset_epoch); + + TORCH_API std::optional getEntry(std::optional id); + + /* + Mark an Event as completed and free its events. + This is called by the watchdog thread, and is asynchronous from the + perspective of the main thread. + compute_duration defaults to true since retire_id is only called in the + watchdog thread, which is currently a place we call cuda APIs which may hang, + but care should be taken to avoid computing duration in any function that must + never hang. (timing must also be enabled for compute_duration - see + TORCH_NCCL_ENABLE_TIMING). + */ + TORCH_API void retire_id( + std::optional id, + std::optional reset_epoch, + bool compute_duration = true); + + TORCH_API void retire_id( + std::optional id, + bool compute_duration = true); + + TORCH_API void reset_all(); + + const c10::List getCollectiveTrace( + bool includeStacktraces, + bool onlyActive); + + // dump pg_entries + const c10::Dict getPgConfig(); + + const std::map> + getPgConfigJson(); + + // dump pg_status + const c10::Dict getPgStatus(); + + const std::map> + getPgStatusJson(); + + std::string dump_json( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool onlyActive); + + std::string dump( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); +}; + +// Whether to include stack trace in the Flight Recorder trace (default true) +static std::vector TORCH_INCLUDE_STACK_TRACE = { + "TORCH_INCLUDE_STACK_TRACE"}; + +// Whether to include only active collectives in the Flight Recorder trace +// (default false) +static std::vector TORCH_INCLUDE_ONLY_ACTIVE = { + "TORCH_INCLUDE_ONLY_ACTIVE"}; + +// Dumps the fr traces and additional information about the Process +// Group. +TORCH_API std::string dump_fr_trace( + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); + +// Dumps the fr traces and additional information about the Process +// Group in JSON formatted string. +// We don't include stack traces in JSON format as it is far too much data. +TORCH_API std::string dump_fr_trace_json( + bool includeCollectives, + bool onlyActive); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0567b3076c3e534363aa0ad13c0a763c73ff5065 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp @@ -0,0 +1,641 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include +#include + +#include + +namespace c10d { + +template +float getDurationFromEvent(EventType& start, EventType& end); + +// Returns the traceback of current entry, in string form. +// Note: `getTraceback` invokes `torch::symbolize`, which may need to acquire +// the GIL. If you don't want to block the current thread or take the risk of a +// GIL deadlock, you can use an asynchronous calling mechanism like std::async. +template +std::string FlightRecorder::Entry::getTraceback() { + torch::CapturedTraceback* traceback = traceback_.get(); + torch::SymbolizedTracebacks s_tbs = torch::symbolize({traceback}); + // We use 0 because we only have one traceback here. + const auto& s_tb = s_tbs.tracebacks.at(0); + std::stringstream oss; + for (auto idx : c10::irange(s_tb.size())) { + auto frame_id = s_tb[idx]; + const auto& frame = s_tbs.all_frames.at(frame_id); + oss << '#' << idx << ' ' << frame.funcname << " from " << frame.filename + << ':' << frame.lineno << '\n'; + } + /* Resulted format is like: + #0 all_reduce from pytorch/torch/distributed/distributed_c10d.py:2696 + #1 wrapper from pytorch/torch/distributed/c10d_logger.py:83 + #2 bar from /home/user/repro.py:15 + #3 foo from /home/user/repro.py:24 + #4 main from /home/user/repro.py:34 + #5 from /home/user/repro.py:40 + */ + return oss.str(); +} + +template +std::optional FlightRecorder::record( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P) { + auto result = recordWithResetEnabled( + pg_id, + pg_name, + collective_seq_id, + p2p_seq_id, + op_id, + std::move(profiling_name), + inputs, + outputs, + start, + end, + timeout_ms, + std::move(pg_status), + isP2P); + return result.id; +} + +template +typename FlightRecorder::TraceIdentifier FlightRecorder:: + recordWithResetEnabled( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P) { + if (!enabled_) { + return TraceIdentifier{std::nullopt, std::nullopt}; + } + if (all_pg_status_.find(pg_id) == all_pg_status_.end()) { + // Current pg_status is not in FR. + all_pg_status_[pg_id] = std::move(pg_status); + } + auto traceback = + torch::CapturedTraceback::gather(true, true, capture_cpp_stack_); + std::lock_guard guard(mutex_); + + TORCH_CHECK( + reset_epoch_start_idx_.find(reset_epoch_) != + reset_epoch_start_idx_.end()); + + auto te = Entry{ + id_, + reset_epoch_, + pg_id, + pg_name, + collective_seq_id, + p2p_seq_id, + op_id, + std::move(profiling_name), + std::move(traceback), + start, + end, + c10::getTime(), + timeout_ms.count(), + isP2P, + std::nullopt, + std::nullopt, + std::nullopt, + {}, + {}, + {}, + {}, + {}, + std::this_thread::get_id(), + c10::getThreadName(), + false}; + + for (const auto& input : inputs) { + c10::IntArrayRef sizes = input.sizes(); + te.input_dtypes_.push_back(input.dtype().toScalarType()); + te.input_dims_.push_back(static_cast(sizes.size())); + te.sizes_.insert(te.sizes_.end(), sizes.begin(), sizes.end()); + } + + for (const auto& output : outputs) { + c10::IntArrayRef sizes = output.sizes(); + te.output_dtypes_.push_back(output.dtype().toScalarType()); + te.output_dims_.push_back(static_cast(sizes.size())); + te.sizes_.insert(te.sizes_.end(), sizes.begin(), sizes.end()); + } + + const auto next = next_++; + + if (entries_.size() < max_entries_) { + entries_.emplace_back(std::move(te)); + } else { + entries_[next] = std::move(te); + } + + if (next_ == max_entries_) { + next_ = 0; + } + + const auto id = id_++; + return TraceIdentifier{id, reset_epoch_}; +} + +template +void FlightRecorder::record_pg_ranks( + const std::tuple& pg_name, + std::vector ranks) { + if (!enabled_) { + return; + } + std::lock_guard guard(mutex_); + pg_name_to_ranks_[pg_name] = std::move(ranks); +} + +template +void FlightRecorder::record_accelerator_version( + const std::string comm_lib_version) { + if (!enabled_) { + return; + } + std::lock_guard guard(mutex_); + comm_lib_version_ = std::move(comm_lib_version); +} + +template +void FlightRecorder::update_state(Entry& r) { + try { + if (r.start_ != nullptr) { + bool started = r.start_->query(); + if (started && !r.time_discovered_started_) { + r.time_discovered_started_ = c10::getTime(); + } + } + if (r.end_ != nullptr) { + bool completed = r.end_->query(); + if (completed && !r.time_discovered_completed_) { + r.time_discovered_completed_ = c10::getTime(); + } + } + } catch (std::exception& e) { + LOG(ERROR) << "Failed to update state for entry " << r.id_ << ": " + << r.profiling_name_ << " with error: " << e.what(); + } +} + +template +std::vector::Entry> FlightRecorder< + EventType>::dump_entries() { + std::vector result; + { + std::lock_guard guard(mutex_); + // Filter entries during insertion - only keep entries from current epoch + auto filter = [this](const Entry& e) { + return e.reset_epoch_ == reset_epoch_; + }; + std::copy_if( + entries_.begin() + static_cast(next_), + entries_.end(), + std::back_inserter(result), + filter); + std::copy_if( + entries_.begin(), + entries_.begin() + static_cast(next_), + std::back_inserter(result), + filter); + } + // query any remaining events + for (auto& r : result) { + update_state(r); + r.start_ = r.end_ = nullptr; + } + return result; +} + +template +// Returns the index in entries_ for the given id and reset_epoch. +// Caller must hold mutex_lock before calling this method. +size_t FlightRecorder::getIdxFromId(size_t id, size_t reset_epoch) + const { + // Look up the starting idx for the given reset epoch + auto it = reset_epoch_start_idx_.find(reset_epoch); + TORCH_CHECK(it != reset_epoch_start_idx_.end()); + // Calculate idx based on where the epoch started + return (it->second + id) % max_entries_; +} + +template +// Returns the entry with the given id and reset_epoch, if it exists. Otherwise, +// returns std::nullopt. +std::optional::Entry> FlightRecorder< + EventType>:: + getEntry(std::optional id, std::optional reset_epoch) { + if (!enabled_ || !id || !reset_epoch) { + return std::nullopt; + } + + std::unique_lock guard(mutex_); + Entry entry = entries_.at(getIdxFromId(*id, *reset_epoch)); + if (entry.id_ == *id && entry.reset_epoch_ == *reset_epoch) { + return entry; + } + return std::nullopt; +} + +template +std::optional::Entry> FlightRecorder< + EventType>::getEntry(std::optional id) { + return getEntry(id, 0); +} + +template +void FlightRecorder::retire_id( + std::optional id, + std::optional reset_epoch, + bool compute_duration) { + if (!enabled_ || !id || !reset_epoch) { + return; + } + + bool can_compute_duration = false; + EventType* startEvent = nullptr; + EventType* endEvent = nullptr; + std::optional duration = std::nullopt; + + std::unique_lock guard(mutex_); + + Entry* entry = &entries_.at(getIdxFromId(*id, *reset_epoch)); + if (entry->id_ == *id && entry->reset_epoch_ == *reset_epoch) { + update_state(*entry); + + if (compute_duration) { + can_compute_duration = entry->time_discovered_completed_.has_value() && + entry->start_ && entry->end_; + startEvent = entry->start_; + endEvent = entry->end_; + } + entry->retired_ = true; + entry->start_ = entry->end_ = nullptr; + } + + if (can_compute_duration) { + // Compute duration without without holding the lock, because + // cudaEventDuration() can hang, and we need to acquire the lock before we + // can dump(), which we never want to block. + guard.unlock(); + duration = getDurationFromEvent(*startEvent, *endEvent); + guard.lock(); + + // Refresh the entry pointer, see if the entry has been overwritten + entry = &entries_.at(getIdxFromId(*id, *reset_epoch)); + if (!(entry->id_ == *id && entry->reset_epoch_ == *reset_epoch)) { + LOG(INFO) << "retire_id abandoned for id " << *id + << ", event was overwritten while waiting to compute duration."; + return; + } + if (duration.has_value()) { + entry->duration_ = duration; + } + } +} + +template +void FlightRecorder::retire_id( + std::optional id, + bool compute_duration) { + retire_id(id, 0, compute_duration); +} + +template +void FlightRecorder::reset_all() { + std::lock_guard guard(mutex_); + if (!entries_.empty()) { + // Soft delete: increment epoch to mark all existing entries as old + // Store where the new epoch starts in the circular buffer + reset_epoch_++; + reset_epoch_start_idx_[reset_epoch_] = next_; + id_ = 0; + } +} + +template +const c10::List FlightRecorder::getCollectiveTrace( + bool includeStacktraces, + bool onlyActive) { + auto entries = new_list(); + // Entries are returned in the order they were recorded + auto result = dump_entries(); + std::vector tracebacks; + torch::SymbolizedTracebacks stracebacks; + std::vector all_frames; + if (includeStacktraces) { + for (auto& e : result) { + tracebacks.push_back(e.traceback_.get()); + } + stracebacks = torch::symbolize(tracebacks); + for (const auto& f : stracebacks.all_frames) { + auto d = new_dict(); + d.insert(name_key, f.funcname); + d.insert(filename_key, f.filename); + d.insert(line_key, int64_t(f.lineno)); + all_frames.emplace_back(std::move(d)); + } + } + for (auto i : c10::irange(result.size())) { + auto dict = new_dict(); + auto& e = result.at(i); + // Skip completed events + if (onlyActive && e.time_discovered_completed_.has_value()) { + continue; + } + if (includeStacktraces) { + auto& tb = stracebacks.tracebacks.at(i); + auto frames = new_list(); + for (auto frame : tb) { + frames.push_back(all_frames.at(frame)); + } + dict.insert(frames_key, frames); + } + + dict.insert(record_id_key, int64_t(e.id_)); + dict.insert(pg_id_key, int64_t(e.pg_id_)); + dict.insert(pg_name_key, e.pg_name_); + dict.insert(thread_name_key, e.thread_name_); + dict.insert(thread_id_key, c10::str(e.thread_id_)); + dict.insert(collective_seq_id_key, int64_t(e.collective_seq_id_)); + dict.insert(p2p_seq_id_key, int64_t(e.p2p_seq_id_)); + dict.insert(op_id_key, int64_t(e.op_id_)); + dict.insert(profiling_name_key, e.profiling_name_); + dict.insert(time_created_key, int64_t(e.time_created_)); + if (e.duration_) { + dict.insert(duration_key, *e.duration_); + } + + auto it = e.sizes_.begin(); + auto read_sizes = [&](const c10::SmallVector& dims) { + auto sizes = new_list(); + for (auto dim : dims) { + auto arg_sizes = new_list(); + for ([[maybe_unused]] auto i : c10::irange(dim)) { + arg_sizes.push_back(*it++); + } + sizes.push_back(arg_sizes); + } + return sizes; + }; + + dict.insert(input_sizes_key, read_sizes(e.input_dims_)); + std::vector input_dtypes_strs; + input_dtypes_strs.reserve(e.input_dtypes_.size()); + for (const auto& input_dtype : e.input_dtypes_) { + input_dtypes_strs.emplace_back(c10::toString(input_dtype)); + } + dict.insert(input_dtypes_key, input_dtypes_strs); + dict.insert(output_sizes_key, read_sizes(e.output_dims_)); + std::vector output_dtypes_strs; + output_dtypes_strs.reserve(e.output_dtypes_.size()); + for (const auto& output_dtype : e.output_dtypes_) { + output_dtypes_strs.emplace_back(c10::toString(output_dtype)); + } + dict.insert(output_dtypes_key, output_dtypes_strs); + if (e.time_discovered_completed_.has_value()) { + dict.insert(state_key, completed_state); + } else if (e.time_discovered_started_.has_value()) { + dict.insert(state_key, started_state); + } else { + dict.insert(state_key, scheduled_state); + } + + dict.insert( + time_discovered_started_key, + e.time_discovered_started_.has_value() + ? int64_t(*e.time_discovered_started_) + : c10::IValue()); + dict.insert( + time_discovered_completed_key, + e.time_discovered_completed_.has_value() + ? int64_t(*e.time_discovered_completed_) + : c10::IValue()); + dict.insert(retired_key, e.retired_); + dict.insert(timeout_key, e.timeout_ms_); + dict.insert(is_p2p_key, e.isP2P_); + + entries.push_back(dict); + } + return entries; +} + +template +const c10::Dict FlightRecorder< + EventType>::getPgConfig() { + auto pg_config = new_dict(); + for (const auto& [pg_name, ranks] : pg_name_to_ranks_) { + auto pg_info = new_dict(); + pg_info.insert("name", std::get<0>(pg_name)); + pg_info.insert("desc", std::get<1>(pg_name)); + pg_info.insert("ranks", ranks_str(ranks)); + pg_config.insert(std::get<0>(pg_name), pg_info); + } + return pg_config; +} + +template +const std::map> FlightRecorder< + EventType>::getPgConfigJson() { + std::map> result; + for (const auto& [pg_name, ranks] : pg_name_to_ranks_) { + auto pg_info = std::map(); + pg_info["name"] = std::get<0>(pg_name); + pg_info["desc"] = std::get<1>(pg_name); + pg_info["ranks"] = ranks_str(ranks); + result.emplace(std::get<0>(pg_name), pg_info); + } + return result; +} + +template +const c10::Dict FlightRecorder< + EventType>::getPgStatus() { + auto all_pg_status = new_dict(); + for (const auto& [pg_id, status] : all_pg_status_) { + auto pg_status = new_dict(); + pg_status.insert("last_enqueued_collective", status->lastEnqueuedSeq); + pg_status.insert("last_started_collective", status->lastStartedSeq); + pg_status.insert("last_completed_collective", status->lastCompletedSeq); + all_pg_status.insert(std::to_string(pg_id), pg_status); + } + return all_pg_status; +} + +template +const std::map> FlightRecorder< + EventType>::getPgStatusJson() { + std::map> result; + for (const auto& [pg_id, status] : all_pg_status_) { + auto pg_status = std::map(); + pg_status["last_enqueued_collective"] = + std::to_string(status->lastEnqueuedSeq); + pg_status["last_started_collective"] = + std::to_string(status->lastStartedSeq); + pg_status["last_completed_collective"] = + std::to_string(status->lastCompletedSeq); + result[std::to_string(pg_id)] = pg_status; + } + return result; +} + +using json = nlohmann::json; +template +std::string FlightRecorder::dump_json( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool onlyActive) { + json result; + result[version_key_str] = version_val_str; + result[comm_lib_version_key_str] = comm_lib_version_; + result[pg_config_key_str] = getPgConfigJson(); + result[pg_status_key_str] = getPgStatusJson(); + + // collective trace + if (includeCollectives) { + std::list entries; + for (auto& e : dump_entries()) { + json j; + if (onlyActive && e.time_discovered_completed_.has_value()) { + continue; + } + j[record_id_key_str] = int64_t(e.id_); + j[pg_id_key_str] = int64_t(e.pg_id_); + j[pg_name_key_str] = e.pg_name_; + j[thread_name_key_str] = e.thread_name_; + j[thread_id_key_str] = c10::str(e.thread_id_); + j[collective_seq_id_key_str] = int64_t(e.collective_seq_id_); + j[p2p_seq_id_key_str] = int64_t(e.p2p_seq_id_); + j[op_id_key_str] = int64_t(e.op_id_); + j[profiling_name_key_str] = e.profiling_name_; + j[time_created_key_str] = int64_t(e.time_created_); + if (e.duration_) { + j[duration_key_str] = *e.duration_; + } + auto it = e.sizes_.begin(); + auto read_sizes = [&](const c10::SmallVector& dims) { + auto sizes = std::list>(); + for (auto dim : dims) { + auto arg_sizes = std::list(); + for (auto i : c10::irange(dim)) { + (void)i; + arg_sizes.push_back(*it++); + } + sizes.push_back(arg_sizes); + } + return sizes; + }; + j[input_sizes_key_str] = read_sizes(e.input_dims_); + std::vector input_dtypes_strs; + input_dtypes_strs.reserve(e.input_dtypes_.size()); + for (const auto& input_dtype : e.input_dtypes_) { + input_dtypes_strs.emplace_back(c10::toString(input_dtype)); + } + j[input_dtypes_key_str] = input_dtypes_strs; + j[output_sizes_key_str] = read_sizes(e.output_dims_); + std::vector output_dtypes_strs; + output_dtypes_strs.reserve(e.output_dtypes_.size()); + for (const auto& output_dtype : e.output_dtypes_) { + output_dtypes_strs.emplace_back(c10::toString(output_dtype)); + } + j[output_dtypes_key_str] = output_dtypes_strs; + if (e.time_discovered_completed_.has_value()) { + j[state_key_str] = completed_state_str; + } else if (e.time_discovered_started_.has_value()) { + j[state_key_str] = started_state_str; + } else { + j[state_key_str] = scheduled_state_str; + } + j[time_discovered_started_key_str] = + e.time_discovered_started_.has_value() + ? int64_t(*e.time_discovered_started_) + : 0; + j[time_discovered_completed_key_str] = + e.time_discovered_completed_.has_value() + ? int64_t(*e.time_discovered_completed_) + : 0; + j[retired_key_str] = e.retired_; + j[timeout_key_str] = e.timeout_ms_; + j[is_p2p_key_str] = e.isP2P_; + entries.emplace_back(j); + } + + if (!entries.empty()) { + result[entries_key_str] = entries; + } + } + + if (extraDumpMap.has_value()) { + result[nccl_comm_key_str] = extraDumpMap.value(); + } + return result.dump(); +} + +template +std::string FlightRecorder::dump( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool includeStackTraces, + bool onlyActive) { + STATIC_SCOPED_WAIT_COUNTER(pytorch.wait_counter.FlightRecorder__dump); + auto result = new_dict(); + // common values + result.insert(version_key, version_val); + result.insert(pg_config_key, getPgConfig()); + result.insert(comm_lib_version_key_str, comm_lib_version_); + result.insert(pg_status_key, getPgStatus()); + + // collective trace + if (includeCollectives) { + result.insert( + entries_key, getCollectiveTrace(includeStackTraces, onlyActive)); + } + + // convert extraDumpMap into a dictionary + auto per_comm_dict = new_dict(); + if (extraDumpMap.has_value()) { + for (const auto& [ncclId, ncclDump] : extraDumpMap.value()) { + auto inner_dict = new_dict(); + for (const auto& [key, value] : ncclDump) { + inner_dict.insert(key, value); + } + per_comm_dict.insert(ncclId, inner_dict); + } + } + if (!per_comm_dict.empty()) { + result.insert(nccl_comm_key, per_comm_dict); + } + return pickle_str(result); +} +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b171a3ac776f56fd1ff7d6e74c5449449ced9cec --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +C10_EXPORT at::Tensor& all_reduce_( + at::Tensor& input, + std::string reduce_op, + std::string group_name); + +C10_EXPORT at::Tensor all_reduce( + const at::Tensor& input, + std::string reduce_op, + std::string group_name); + +C10_EXPORT std::vector all_reduce_coalesced_( + std::vector inputs, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string reduce_op, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT std::vector all_reduce_coalesced( + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::vector inputs, + std::string reduce_op, + std::string group_name); + +C10_EXPORT std::vector all_gather_into_tensor_coalesced( + std::vector inputs, + int64_t group_size, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor all_gather_into_tensor( + const at::Tensor& input, + int64_t group_size, + std::string group_name); + +C10_EXPORT at::Tensor& all_gather_into_tensor_out( + at::Tensor& input, + int64_t group_size, + const std::string& group_name, + at::Tensor& output); + +C10_EXPORT std::vector reduce_scatter_tensor_coalesced( + std::vector inputs, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string reduce_op, + int64_t group_size, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor reduce_scatter_tensor( + const at::Tensor& input, + std::string reduce_op, + int64_t group_size, + std::string group_name); + +C10_EXPORT at::Tensor reduce_scatter_tensor_out( + const at::Tensor& input, + std::string reduce_op, + int64_t group_size, + std::string group_name, + at::Tensor& output); + +C10_EXPORT at::Tensor all_to_all_single( + const at::Tensor& input, + at::SymIntArrayRef output_split_sizes, + at::SymIntArrayRef input_split_sizes, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor& broadcast_( + at::Tensor& input, + int64_t src, + std::string group_name); + +C10_EXPORT at::Tensor broadcast( + const at::Tensor& input, + int64_t src, + std::string group_name); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9770bbe9f3822bc68fa8c3344e92c17b95004dff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include + +#include +#include +#include + +namespace c10d { + +class TORCH_API GlooDeviceFactory { + public: + // Create new device instance for specific interface. + static std::shared_ptr<::gloo::transport::Device> makeDeviceForInterface( + const std::string& interface, + bool lazyInit); + + // Create new device instance for specific hostname or address. + static std::shared_ptr<::gloo::transport::Device> makeDeviceForHostname( + const std::string& hostname, + bool lazyInit); +}; + +TORCH_DECLARE_SHARED_REGISTRY( + GlooDeviceRegistry, + ::gloo::transport::Device, + const std::string&, /* interface */ + const std::string&, /* hostname */ + bool /* lazyInit */); + +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4df50c8b39e5fbfc27e74477651d865a5d3dc06d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +C10_EXPORT void set_thread_isolation_mode(bool enable); + +bool get_thread_isolation_mode(); + +C10_EXPORT void register_process_group( + const std::string& group_name, + const c10::intrusive_ptr& group); + +C10_EXPORT c10::intrusive_ptr resolve_process_group( + const std::string& group_name); + +C10_EXPORT void unregister_process_group(const std::string& group_name); + +C10_EXPORT void unregister_all_process_groups(); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e9b6186ef3bbf413ade67ddc83b764c43e655fa6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { + +class TORCH_API HashStore : public Store { + public: + c10::intrusive_ptr clone() override; + + ~HashStore() override = default; + + void set(const std::string& key, const std::vector& data) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + void wait(const std::vector& keys) override { + wait(keys, timeout_); + } + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + int64_t add(const std::string& key, int64_t value) override; + + int64_t getNumKeys() override; + + bool check(const std::vector& keys) override; + + bool deleteKey(const std::string& key) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + // Returns true if this store support append, multiGet and multiSet + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + std::vector listKeys() override; + + protected: + bool checkLocked( + const std::unique_lock& lock, + const std::vector& keys); + + void waitLocked( + std::unique_lock& lock, + const std::vector& keys, + const std::chrono::milliseconds& timeout); + + protected: + std::unordered_map> map_; + std::unordered_map>> queues_; + std::mutex m_; + std::condition_variable cv_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..87417b52ae59481ef89a4cfb2c9c377d1941eae4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp @@ -0,0 +1,448 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +constexpr int64_t kCommInitBusyWaitMillis = 2; + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 14, 0) +#define NCCL_HAS_COMM_NONBLOCKING +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 18, 0) +#define NCCL_HAS_COMM_SPLIT +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 23, 0) +#define NCCL_HAS_INIT_RANK_SCALABLE +#endif + +// ncclGetLastError() is enabled only for NCCL versions 2.13+ +// ncclRemoteError only exists in NCCL versions 2.13+ +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 13, 0) +#define ENABLE_NCCL_GET_LAST_ERROR +#define NCCL_REMOTE_ERROR +#endif + +static_assert( + NCCL_VERSION_CODE >= NCCL_VERSION(2, 7, 0), + "NCCL version must be 2.7 or later"); +// The following macros represent features supported prior to NCCL 2.7, +// therefore we can define them unconditionally, given the static_assert above. +// TODO: remove these macros from code. +#define ENABLE_NCCL_ERROR_CHECKING +#define ENABLE_NCCL_P2P_SUPPORT +// End of macros for NCCL 2.7 and below. + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) +#define ENABLE_NCCL_PREMUL_SUM_SUPPORT +#endif + +// Note: the first version that supports ncclConfig_t is 2.14. Here we +// fast-forward the version requirement to 2.17 where ncclConfig_t has CTA and +// CGA fields because they have already been pybinded out. +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 17, 0) +#define NCCL_HAS_CONFIG +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) +#define NCCL_HAS_COMM_REGISTER +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COMM_WINDOW_REGISTER +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) +#define NCCL_HAS_MEM_ALLOC +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 26, 0) +#define NCCL_HAS_QOS +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 24, 0) +#define NCCL_SUPPORTS_FP8 +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COLLNET +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_CTA_POLICY +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_NVLS_CTAS +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COMM_SHRINK +#endif + +// Macro to throw on a non-successful NCCL return value. +#define C10D_NCCL_CHECK(cmd, failureReason) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to throw on a non-successful NCCL return value for NONBLOCKING calls. +#define C10D_NCCL_CHECK_NONBLOCKING(cmd, failureReason) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess && result != ncclInProgress) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Error out if (current time - startTime) is greater than timeout (sec). +#define C10D_CHECK_TIMEOUT(startTime, timeout) \ + do { \ + auto currentTime = std::chrono::steady_clock::now(); \ + auto timeElapsed = std::chrono::duration_cast( \ + currentTime - startTime) \ + .count(); \ + if (timeElapsed > timeout) { \ + std::string err = "NCCL timeout in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to throw on a non-successful NCCL return value, non-blocking. +// Thread-safe: uses NCCLComm wrapper's getAsyncError() which acquires mutex +// before calling ncclCommGetAsyncError to prevent race conditions between +// watchdog and main threads. +#define C10D_NCCL_CHECK_TIMEOUT_BASE( \ + cmd, commWrapper, failureReason, yield_fn) \ + do { \ + ncclResult_t result = cmd; \ + auto startTimepoint = std::chrono::steady_clock::now(); \ + auto timeout = nccl_nonblocking_timeout(); \ + while (result == ncclInProgress) { \ + C10D_CHECK_TIMEOUT(startTimepoint, timeout); \ + yield_fn; \ + commWrapper->getAsyncError(&result); \ + } \ + if (result != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Sleep for kCommInitBusyWaitMillis milliseconds. +#define C10D_SCHED_SLEEP() \ + std::this_thread::sleep_for( \ + std::chrono::milliseconds(kCommInitBusyWaitMillis)) + +// Macro to throw exception on a non-successful NCCL return value or timeout. +// This macro uses sched_yield() to yield the CPU. +// Thus suitable for NCCL calls that would quickly turn ncclSuccess, e.g. +// collectives. +#define C10D_NCCL_CHECK_TIMEOUT(cmd, commWrapper, failureReason) \ + C10D_NCCL_CHECK_TIMEOUT_BASE(cmd, commWrapper, failureReason, sched_yield()) + +// Macro to throw exception on a non-successful NCCL return value or timeout. +// This macro uses sleep to yield the CPU. +// Thus suitable for NCCL calls that would take longer to turn ncclSuccess, e.g. +// ncclCommInitRankConfig, ncclCommFinalize, etc. +#define C10D_NCCL_CHECK_TIMEOUT_SLEEP(cmd, commWrapper, failureReason) \ + C10D_NCCL_CHECK_TIMEOUT_BASE( \ + cmd, commWrapper, failureReason, C10D_SCHED_SLEEP()) + +#define C10D_NCCL_CHECK_TIMEOUT_GROUPEND(cmd, comm, failureReason) \ + do { \ + ncclResult_t state = cmd; \ + auto startTimepoint = std::chrono::steady_clock::now(); \ + auto timeout = nccl_nonblocking_timeout(); \ + if (state == ncclInProgress) { \ + do { \ + C10D_CHECK_TIMEOUT(startTimepoint, timeout); \ + sched_yield(); \ + comm->getAsyncError(&state); \ + } while (state == ncclInProgress); \ + } \ + if (state != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(state) + \ + "\n" + getNcclErrorDetailStr(state, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to print and abort on a non-successful NCCL return value. +#define C10D_NCCL_ASSERT(cmd) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess) { \ + std::string err = ncclGetErrorWithVersion(result); \ + fprintf( \ + stderr, \ + "NCCL error in: %s:%d, %s\n", \ + __FILE__, \ + __LINE__, \ + err.c_str()); \ + abort(); \ + } \ + } while (0) + +namespace c10d { + +// NCCL type typing +static std::map ncclDataType = { + {at::kChar, ncclInt8}, + {at::kByte, ncclUint8}, + {at::kFloat, ncclFloat}, + {at::kDouble, ncclDouble}, + {at::kInt, ncclInt32}, + {at::kLong, ncclInt64}, + {at::kHalf, ncclHalf}, + {at::kBool, ncclUint8}, +#ifdef NCCL_SUPPORTS_FP8 + {at::kFloat8_e5m2, ncclFloat8e5m2}, + {at::kFloat8_e4m3fn, ncclFloat8e4m3}, +#else + {at::kFloat8_e5m2, ncclUint8}, + {at::kFloat8_e4m3fn, ncclUint8}, +#endif + // NVIDIA GPUs does not support the UZ version standing for "no negative + // zero". See https://onnx.ai/onnx/technical/float8.html + {at::kFloat8_e4m3fnuz, ncclUint8}, + {at::kFloat8_e5m2fnuz, ncclUint8}, +#if HAS_NCCL_BF16_DATATYPE + {at::kBFloat16, ncclBfloat16}, +#endif // HAS_NCCL_BF16_DATATYPE +}; + +TORCH_API size_t hashTensors(const std::vector& tensors); +TORCH_API int genNcclSplitColor(const std::vector& ranks); +TORCH_API std::string getNcclVersion(); +TORCH_API std::tuple getNcclVersionTuple(); +TORCH_API int getNcclVersionNumber(); +TORCH_API std::string ncclGetErrorWithVersion(ncclResult_t error); +int nccl_nonblocking_timeout(); + +// Provides additional detail into NCCL error codes based on when these are +// thrown in the NCCL codebase. +TORCH_API std::string getNcclErrorDetailStr( + ncclResult_t error, + std::optional processGroupFailureReason = std::nullopt); + +// Helper function that gets the data type and issues error if not supported +ncclDataType_t getNcclDataType(at::ScalarType type); + +// RAII wrapper for NCCL communicator +class NCCLComm { + using MutexType = std::recursive_mutex; + using LockType = std::unique_lock; + + public: + explicit NCCLComm(ncclComm_t ncclComm); + + NCCLComm() = default; + + ~NCCLComm() noexcept; + + void setUniqueHash(ncclUniqueId ncclId); + void setUniqueHash(std::string hash); + std::string getUniqueHash(); + + static std::shared_ptr create( + int numRanks, + int rank, + ncclUniqueId commId, + at::DeviceIndex deviceIndex); + +#ifdef NCCL_HAS_CONFIG + static std::shared_ptr create( + int numRanks, + int rank, + ncclUniqueId commId, + at::DeviceIndex deviceIndex, + ncclConfig_t& config); +#ifdef NCCL_HAS_INIT_RANK_SCALABLE + static std::shared_ptr create_scalable( + int numRanks, + int rank, + std::vector& commIds, + at::DeviceIndex deviceIndex, + ncclConfig_t& config); +#endif // NCCL_HAS_INIT_RANK_SCALABLE +#endif // NCCL_HAS_CONFIG + +#ifdef NCCL_HAS_COMM_SPLIT + static std::shared_ptr split( + NCCLComm* source, + int color_id, + int rank, + ncclConfig_t& config); +#endif // NCCL_HAS_COMM_SPLIT + +#ifdef NCCL_HAS_COMM_SHRINK + static std::shared_ptr shrink( + NCCLComm* source, + std::vector& ranks_to_exclude, + ncclConfig_t* config, + int shrinkFlags = 0); +#endif // NCCL_HAS_COMM_SHRINK + +#if (defined(IS_NCCLX) || defined(USE_ROCM)) && defined(NCCL_COMM_DUMP) + std::unordered_map ncclCommDump(); +#endif + + at::DeviceIndex getDeviceIndex(); + + // Must not be copyable + NCCLComm(const NCCLComm&) = delete; + NCCLComm& operator=(const NCCLComm&) = delete; + + // Do not support move assignment as there is no valid use case + NCCLComm& operator=(NCCLComm&& other) = delete; + + // Move constructable + // NOLINTNEXTLINE(*-noexcept-move-*) + NCCLComm(NCCLComm&& other); + + ncclComm_t getNcclComm(); + + // Wait for the communicator to be ready. This is a blocking function. + // Useful in nonblocking mode: NCCL requires the communicator to be ready + // before issuing a second command. + // Arguments: + // longInterval: if true, wait with sleep of an interval; otherwise, wait + // with `sched_yield` which is faster (but acquires CPU more frequently). + // Use `longInterval=true` when waiting for initialization or finalize to + // complete. Use `longInterval=false` when waiting collective call to return + // ncclSuccess. + void waitReady(bool longInterval); + + std::optional getNcclCommFailureReason() const; + + void abort(std::optional commFailureReason = std::nullopt); + + // Finalize a communicator -- asking it to flush its operations. When the + // communicator is marked as nonblocking, this is a nonblocking function; + // otherwise, it will block till all operations complete. + void finalize(); + + // Destroy a communicator. This is a blocking function. + void destroy(); + + bool isInitialized() const; + + bool isAborted() const; + + uint64_t getCommSplitCounter() const; + + ncclResult_t checkForNcclError(); + + // Thread-safe wrapper for ncclCommGetAsyncError that acquires the mutex + // before calling the NCCL API. This is needed because NCCL does not provide + // thread-safety guarantees for ncclCommGetAsyncError, and both the main + // thread and watchdog thread may call it concurrently. + ncclResult_t getAsyncError(ncclResult_t* asyncError); + + ncclResult_t registerSegment( + void* ptr, + size_t size, + bool errorOnRereg = true, + bool window = false); + + ncclResult_t deregisterSegment(void* ptr, bool window = false); + + std::string repr() const; + + friend class ProcessGroupNCCL; + + protected: + // Unique hash for this communicator. + std::string uniqueHash_; + bool aborted_{false}; + uint64_t ncclCommSplitCounter_{0}; + ncclResult_t ncclAsyncErr_{ncclSuccess}; + mutable MutexType mutex_; + // Rank that this communicator corresponds to. + int rank_{}; + // Optional reason for communicator failure, provided by ProcessGroupNCCL for + // better error messaging. + std::optional commFailureReason_; + bool initialized_{false}; + // Whether this communicator is using nonblocking mode. Recorded during comm + // creation or split. For safety, we give a default value of true (more + // protection). + bool nonBlocking_{true}; + // Device index for which the NCCL comm is created + at::DeviceIndex deviceIndex_{-1}; +#ifdef NCCL_HAS_COMM_REGISTER + // Stores handlers for tensors registered by NCCL + std::unordered_map registeredSegmentHandles_; +#endif // NCCL_HAS_COMM_REGISTER + + private: + ncclComm_t ncclComm_{nullptr}; +}; + +// Helper that automatically cleans up premul sums. +struct ncclRedOpRAII { + ncclRedOpRAII() = default; + ncclRedOpRAII(ncclRedOp_t op) : op_(op) {} + ncclRedOpRAII(ncclRedOp_t op, ncclComm_t comm) + : op_(op), comm_(comm), premul_sum_(true) {} + ncclRedOpRAII(const ncclRedOpRAII&) = delete; + ncclRedOpRAII& operator=(const ncclRedOpRAII&) = delete; + ncclRedOpRAII(ncclRedOpRAII&& tmp) noexcept : ncclRedOpRAII() { + std::swap(tmp.op_, this->op_); + std::swap(tmp.comm_, this->comm_); + std::swap(tmp.premul_sum_, this->premul_sum_); + } +#if defined(ENABLE_NCCL_PREMUL_SUM_SUPPORT) + ~ncclRedOpRAII() { + if (premul_sum_) { + ncclRedOpDestroy(op_, comm_); + } + } +#endif // ENABLE_NCCL_PREMUL_SUM_SUPPORT + operator ncclRedOp_t() const { + return op_; + } + ncclRedOp_t op_{}; + ncclComm_t comm_{}; + bool premul_sum_ = false; +}; + +void printNcclCommProxyTrace( + const std::string& dumpReason, + const std::unordered_map& dumpMap); +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp new file mode 100644 index 0000000000000000000000000000000000000000..53915ea60411f46bb381ae2ed314d5e9348f97aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#include +#include + +namespace c10d { + +// Check for NaNs in a tensor on a given stream. If any are found, throw a +// device-side error. +void checkForNan(const at::Tensor& tensor, at::cuda::CUDAStream& stream); + +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1d1b140dfa054f4326ce078b532ab2db1007f88f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp @@ -0,0 +1,185 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch { + +class TORCH_API ParamCommsDebugInfo : public c10::DebugInfoBase { + public: + ParamCommsDebugInfo() = default; + ParamCommsDebugInfo( + std::tuple pgName, + int rank, + std::string&& collName, + int64_t inNelems, + int64_t outNelems, + at::ScalarType dType, + std::vector inSplitSizes, + std::vector outSplitSizes, + int globalRankStart, + int globalRankStride, + int worldSize); + + ~ParamCommsDebugInfo() override = default; + + const std::string getProcessGroupName() const { + return std::get<0>(pgName_); + } + + const std::string getProcessGroupDesc() const { + return std::get<1>(pgName_); + } + + int getRank() const { + return rank_; + } + + int getWorldSize() const { + return worldSize_; + } + + int getGlobalRankStart() const { + return globalRankStart_; + } + + int getGlobalRankStride() const { + return globalRankStride_; + } + + const std::string getCollectiveName() const { + return collectiveName_; + } + + int64_t getInMessageNelems() const { + return inMessageNelems_; + } + + int64_t getOutMessageNelems() const { + return outMessageNelems_; + } + + at::ScalarType getDType() const { + return dType_; + } + + const std::vector& getInputSplitSizes() const { + return inputSplitSizes_; + } + + const std::vector& getOutputSplitSizes() const { + return outputSplitSizes_; + } + + const std::vector& getGroupRanks() const { + return groupRanks_; + } + + private: + std::tuple pgName_; // + int rank_{}; + int worldSize_{}; + std::string collectiveName_; + int64_t inMessageNelems_{}; + int64_t outMessageNelems_{}; + at::ScalarType dType_ = at::kByte; + std::vector inputSplitSizes_; + std::vector outputSplitSizes_; + int globalRankStart_{}; + int globalRankStride_{}; + std::vector groupRanks_; +}; + +#define RECORD_PARAM_COMMS( \ + seq, \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize) \ + auto paramCommsInfo = std::make_shared( \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize); \ + c10::DebugInfoGuard g(c10::DebugInfoKind::PARAM_COMMS_INFO, paramCommsInfo); \ + std::initializer_list paramList = { \ + seq, \ + pgName, \ + rank, \ + collName, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize}; \ + c10::ArrayRef paramInputs(paramList); \ + RECORD_FUNCTION(at::kParamCommsCallName, paramInputs); + +#define RECORD_PARAM_COMMS_DATA( \ + seq, \ + pgName, \ + InputTensors, \ + OutputTensors, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize) \ + auto paramCommsInfo = std::make_shared( \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize); \ + c10::DebugInfoGuard g(c10::DebugInfoKind::PARAM_COMMS_INFO, paramCommsInfo); \ + std::initializer_list paramList = { \ + c10::IValue(InputTensors), \ + seq, \ + pgName, \ + rank, \ + collName, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize}; \ + c10::ArrayRef paramInputs(paramList); \ + RECORD_FUNCTION_WITH_INPUTS_OUTPUTS( \ + at::kParamCommsCallName, \ + paramInputs, \ + std::vector(1, c10::IValue(OutputTensors))); +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..48411dd8182e462c0e2592f30e0714a70769cc63 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +class TORCH_API PrefixStore : public Store { + public: + explicit PrefixStore(std::string prefix, c10::intrusive_ptr store); + + c10::intrusive_ptr clone() override; + + using Store::set; + void set(const std::string& key, const std::vector& value) override; + + using Store::compareSet; + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + bool deleteKey(const std::string& key) override; + + int64_t getNumKeys() override; + + bool check(const std::vector& keys) override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + const std::chrono::milliseconds& getTimeout() const noexcept override; + + void setTimeout(const std::chrono::milliseconds& timeout) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + // Returns true if this store support append, multiGet and multiSet + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + c10::intrusive_ptr getUnderlyingStore(); + + // Recursively to fetch the store before layers of wrapping with PrefixStore. + c10::intrusive_ptr getUnderlyingNonPrefixStore(); + + std::vector listKeys() override; + + protected: + std::string prefix_; + c10::intrusive_ptr store_; + + std::string joinKey(const std::string& key); + std::vector joinKeys(const std::vector& keys); +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..535b3d8c2bcd45ef7280670199fa3054fc61a520 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -0,0 +1,1037 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +// ************************************************************************* +// PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN +// versions 1.7 and 1.8. +// PLEASE DO NOT ADD ANY DEPENDENCIES. +// SEE RFC: https://github.com/pytorch/pytorch/issues/39662 +// ************************************************************************* + +constexpr auto kProcessGroupDefaultTimeout = + std::chrono::milliseconds(30 * 60 * 1000); + +namespace c10d { + +// We only call `register_work()` in two cases: +// 1. If the work object is created from a functional collective call. +// 2. If the work object is created from a non-functional collective call within +// the `with allow_inflight_collective_as_graph_input_ctx()` context manager. +C10_EXPORT void register_work( + const at::Tensor& tensor, + const c10::intrusive_ptr& work); + +C10_EXPORT at::Tensor wait_tensor(const at::Tensor& tensor); + +// We only call `unregister_work()` in one case: +// 1. If the work object is created from a non-functional collective call within +// the `with allow_inflight_collective_as_graph_input_ctx()` context manager. +// +// Q: What about the functional collective case? +// A: The unregistration of work object for functional collective is done in +// the required user-side explicit call to `wait_tensor()`. +C10_EXPORT void unregister_work(const c10::intrusive_ptr& work); + +C10_EXPORT size_t get_work_registry_size(); + +C10_EXPORT void set_allow_inflight_collective_as_graph_input(bool value); + +C10_EXPORT bool allow_inflight_collective_as_graph_input(); + +// ProcessGroup is a base class that captures collective and point to +// point communication in a fixed set of processes. +// +// The functions specified in the class below describe the API alone; +// implementations are provided in subclasses. +// +// Every function that performs I/O is executed asynchronously by a +// thread pool owned by the ProcessGroup (by default). They return an +// object that can be used to wait for completion or error. +// +// The ProcessGroup can instantiate subgroups with fewer or an equal +// number of members. Implementations must take care that multiple +// process groups can be used in parallel and synchronize accordingly. +// +// The ProcessGroup assumes a fixed set of processes. If the set +// changes, existing instances must be destructed and instantiation +// and initialization must start from scratch. For members of the +// process group to find each other (referred to as rendezvous from +// hereon) +// +class TORCH_API ProcessGroup : public torch::CustomClassHolder { + public: + struct TORCH_API MergeOptions : torch::CustomClassHolder { + explicit MergeOptions( + const std::chrono::milliseconds timeout = kProcessGroupDefaultTimeout, + const std::optional group_name = std::nullopt, + const std::optional group_desc = std::nullopt) + : timeout(timeout), group_name(group_name), group_desc(group_desc) {} + ~MergeOptions() override = default; + MergeOptions(const MergeOptions&) = delete; + MergeOptions& operator=(const MergeOptions&) = delete; + + std::chrono::milliseconds timeout; + std::optional group_name; + std::optional group_desc; + }; + + enum BackendType : uint8_t { + UNDEFINED = 0, + GLOO = 1, + NCCL = 2, + UCC = 3, + MPI = 4, + XCCL = 5, + CUSTOM = 6, + }; + + static std::string backendTypeToString(const BackendType& type) { + switch (type) { + case BackendType::GLOO: + return "gloo"; + case BackendType::NCCL: + return "nccl"; + case BackendType::XCCL: + return "xccl"; + case BackendType::UCC: + return "ucc"; + case BackendType::MPI: + return "mpi"; + case BackendType::UNDEFINED: + return "undefined"; + case BackendType::CUSTOM: + return "custom"; + default: + TORCH_CHECK(false, "THis should never happen!"); + } + } + + static BackendType strToBackendType(const std::string& backend) { + if (backend == "undefined") { + return BackendType::UNDEFINED; + } else if (backend == "gloo") { + return BackendType::GLOO; + } else if (backend == "nccl") { + return BackendType::NCCL; + } else if (backend == "xccl") { + return BackendType::XCCL; + } else if (backend == "ucc") { + return BackendType::UCC; + } else if (backend == "mpi") { + return BackendType::MPI; + } else { + return BackendType::CUSTOM; + } + } + + // Not used, set for backwards compatibility and only used for TypeDef in + // Ops.cpp + explicit ProcessGroup(int rank, int size); + + explicit ProcessGroup( + c10::intrusive_ptr<::c10d::Store> store, + int rank, + int size); + ~ProcessGroup() override; + + virtual int getRank() const { + return rank_; + } + + virtual int getSize() const { + return size_; + } + + // Returns an unique opaque ID of this process group object. + int64_t getID() const { + return reinterpret_cast(this); + } + + // Returns an unique opaque ID of a backend for the specific backend type + // that can correlate with this process group's collectives. + int64_t getBackendID(BackendType backend_type) const { + return reinterpret_cast(getBackend(backend_type).get()); + } + + virtual const std::string getBackendName() const { + return backendTypeToString(backendType_); + } + + BackendType getBackendType() const { + return backendType_; + } + + inline bool backendSupportsSequenceNumbers(BackendType backendType) { + if (backendType == BackendType::GLOO || backendType == BackendType::NCCL || + backendType == BackendType::XCCL || backendType == BackendType::UCC) + return true; + return false; + } + + virtual void setTimeout(std::chrono::milliseconds timeout) { + for (auto& backend : backendTypeToBackend_) { + backend.second->setTimeout(timeout); + } + } + + int64_t incrementSplitCount() { + return splitCounter_++; + } + + virtual void startCoalescing(c10::DeviceType deviceType) { + // only nccl has implemented startCoalescing so only execute for nccl + // backends + auto backend = getBackend(deviceType); + backend->startCoalescing(); + } + + virtual c10::intrusive_ptr endCoalescing(c10::DeviceType deviceType) { + // only nccl has implemented endCoalescing so only execute for nccl + // backends + auto backend = getBackend(deviceType); + auto work = backend->endCoalescing(); + return work; + } + + virtual c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::broadcast_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t, + bool, + int64_t)>(); + // It's awakward to unbox the opts here and box them again in the custom C++ + // op. But it's also complicated to make opts as a CustomClassHolder. Leave + // it as it is now. + auto work = std::get<1>(op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.rootTensor, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allreduce_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + const std::optional& sparse_indices, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.sparseIndices, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = AllreduceCoalescedOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allreduce_coalesced_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + int64_t, + int64_t, + bool, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.rootRank, + opts.rootTensor, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_", "") + .typed>, + c10::intrusive_ptr>( + const std::vector>&, + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensors) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + // Gathers a single tensor inputBuffer into a single buffer outputBuffer that + // is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE. + // For implementers of ProcessGroup API and advanced users only. + // Note: this function will be deprecated in near future. + virtual c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::_allgather_base_", "") + .typed>( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + // This function is deprecated and will be moved out of ProcessGroup to comms: + // * do not add dependencies on this function, + // * do not implement it in your ProcessGroup, implement _allgather_base + // instead. + virtual c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_coalesced_", "") + .typed( + const std::vector>&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool)>(); + + auto work = op.call( + outputTensorLists, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensorLists) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + // This function is a coalesced version of `allgather_into_tensor` (currently + // still named as `_allgather_base`). Each tensor in the vector corresponds to + // an input/output of one `allgather_into_tensor` operation. + virtual c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_into_tensor_coalesced_", "") + .typed( + const at::TensorList, + const at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool)>(); + + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::gather_", "") + .typed( + const std::vector>&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + bool, + int64_t)>(); + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensors) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + virtual c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::scatter_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const std::vector>&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_scatter_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const std::vector>&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::_reduce_scatter_base_", "") + .typed>( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + // This function is a coalesced version of `reduce_scatter_tensor` (currently + // still named as `_reduce_scatter_base`). Each tensor in the vector + // corresponds to an input/output of one `reduce_scatter_tensor` operation. + virtual c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_scatter_tensor_coalesced_", "") + .typed( + const at::TensorList, + const at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr alltoall_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::alltoall_base_", "") + .typed( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + std::vector, + std::vector, + bool, + int64_t)>(); + auto work = op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + outputSplitSizes, + inputSplitSizes, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + virtual c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::alltoall_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual void monitoredBarrier( + const BarrierOptions& opts, + bool wait_all_ranks = false) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::monitored_barrier_", "") + .typed&, + const std::vector&, + int64_t, + bool)>(); + // Default to using cpu implementation, monitored barrier is only for GLOO + at::Tensor tensor = at::empty({0}, at::TensorOptions().device(at::kCPU)); + op.call( + tensor, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.device_ids, + opts.timeout.count(), + wait_all_ranks); + } + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + virtual void setSequenceNumberForGroup() { + auto backendType = getBackendType(); + // TODO: HACK for backend name to get sequence number for that backend. + if (backendSupportsSequenceNumbers(backendType)) { + getDefaultBackend()->setSequenceNumberForGroup(); + } else { + TORCH_CHECK( + false, + c10::str( + "ProcessGroup ", + getBackendName(), + " does not yet support sequence numbers.")); + } + } + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + virtual uint64_t getSequenceNumberForGroup() { + auto backendType = getBackendType(); + + // TODO: HACK for backend name to get sequence number for that backend. + if (backendSupportsSequenceNumbers(backendType)) { + return getDefaultBackend()->getSequenceNumberForGroup(); + } else { + TORCH_CHECK( + false, + c10::str( + "ProcessGroup ", + getBackendName(), + " does not yet support sequence numbers.")); + } + } + + virtual c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::send", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + dstRank, + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::recv_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + srcRank, + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::recv_any_source_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) { + static at::Tensor tensor; + // TODO: if nccl was specified then use it + auto device = opts.device; + if (device.has_value()) { + // set device tensor from argument + tensor = at::empty( + {1}, at::TensorOptions().device(device.value()).dtype(at::kByte)); + } else if (backendType_ == c10d::ProcessGroup::BackendType::NCCL) { + // set cuda tensor + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::CUDA).dtype(at::kByte)); + } else if (backendType_ == c10d::ProcessGroup::BackendType::XCCL) { + // set xpu tensor for override cpu dispatch + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::XPU).dtype(at::kByte)); + } else { + // Default to using cpu implementation + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::CPU).dtype(at::kByte)); + } + + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::barrier", "") + .typed( + at::Tensor, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const std::vector&, + bool, + int64_t)>(); + + auto work = op.call( + tensor, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.device_ids, + opts.asyncOp, + opts.timeout.count()); + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(tensor, work); + } + return work; + } + + bool hasBackends() { + return !deviceTypeToBackendType_.empty(); + } + + void setBackend( + c10::DeviceType deviceType, + BackendType backendType, + const std::optional>& backend) { + // TODO: should we add these entries after the backend setting succeeds? + deviceTypeToBackendType_[deviceType] = backendType; + deviceTypes_.insert(deviceType); + // if the backendType is already set then reuse it for this device + if (backendTypeToBackend_.find(backendType) != + backendTypeToBackend_.end()) { + auto existingBackend = backendTypeToBackend_.at(backendType); + deviceTypeToBackend_[deviceType] = existingBackend; + TORCH_CHECK( + existingBackend->getBoundDeviceId() == + (*backend)->getBoundDeviceId()); + } else { + // check if backend has value + if (backend.has_value()) { + deviceTypeToBackend_[deviceType] = backend.value(); + backendTypeToBackend_[backendType] = backend.value(); + (*backend)->setBoundDeviceId(bound_device_id_); + } + } + } + + c10::intrusive_ptr getDefaultBackend() const { + auto backend_iter = backendTypeToBackend_.find(backendType_); + TORCH_CHECK( + backend_iter != backendTypeToBackend_.end(), + "Could not find the default backend type ", + uint16_t(backendType_), + " for Process Group with name ", + getBackendName(), + "."); + return backend_iter->second; + } + + void setDefaultBackend(const BackendType& backendType) { + backendType_ = backendType; + } + + void setDefaultBackend(const std::string& backend) { + backendType_ = strToBackendType(backend); + } + + c10::intrusive_ptr getBackend(c10::DeviceType deviceType); + + c10::intrusive_ptr getBackend(BackendType backendType) const { + TORCH_CHECK( + backendTypeToBackend_.find(backendType) != backendTypeToBackend_.end(), + "Could not find backend type ", + uint16_t(backendType), + " for Process Group with name ", + backendTypeToString(backendType), + "."); + return backendTypeToBackend_.at(backendType); + } + + // Return device types supported by this ProcessGroup. + // Note: the return type is `Device` rather than `DeviceType` for the purpose + // of easy comparison at Python level. The `Device` will have default index + // (-1). + std::vector getDeviceTypes() const { + std::vector devices; + devices.reserve(deviceTypes_.size()); + for (auto& dt : deviceTypes_) { + devices.emplace_back(dt); + } + return devices; + } + + void registerOnCompletionHook( + std::function)>&& hook) { + getDefaultBackend()->registerOnCompletionHook(std::move(hook)); + } + + void waitForPendingWorks() { + getDefaultBackend()->waitForPendingWorks(); + } + + virtual void shutdown() { + for (auto& backend : backendTypeToBackend_) { + backend.second->shutdown(); + } + } + + virtual void abort() { + for (auto& backend : backendTypeToBackend_) { + backend.second->abort(); + } + } + + bool hasHooks() const { + auto backend_iter = backendTypeToBackend_.find(backendType_); + if (backend_iter == backendTypeToBackend_.end()) { + TORCH_WARN( + "No backend of type ", + uint16_t(backendType_), + " found for Process Group with name ", + getBackendName(), + ". Assuming no hooks are registered."); + return false; + } + + return backend_iter->second->hasHooks(); + } + + virtual const std::string& getGroupName() const; + virtual void setGroupName(const std::string& name); + virtual const std::string& getGroupDesc() const; + virtual void setGroupDesc(const std::string& name); + void enableCollectivesTiming(); + + void release_resources() override; + + // ProcessGroups optionally can be "bound" to a specific device. + // Currently this is only for nccl and allows for some opt-in + // optimizations such as automatic use of ncclCommSplit. The device + // is specified in `init_process_group` and eventually makes it + // here and then down into the actual backend instances. + std::optional getBoundDeviceId() const { + return bound_device_id_; + } + + c10::intrusive_ptr getStore() const { + return store_; + } + + void setBoundDeviceId(std::optional device) { + if (device) { + TORCH_CHECK(device->has_index(), "setBoundDeviceId must have an index"); + } + bound_device_id_ = device; + } + + // This creates a new subgroup using the specified ranks. + // The current rank must be included in the list of new_ranks. + virtual c10::intrusive_ptr splitGroup( + const std::vector& ranks, + const std::optional& timeout, + const std::optional>& opts, + const std::optional& name, + const std::optional& groupDesc); + + // This creates a new subgroup using the specified ranks. + // The current rank must be included in the list of new_ranks. + virtual c10::intrusive_ptr mergeRemoteGroup( + const c10::intrusive_ptr& store, + const MergeOptions& opts, + const int& size); + + protected: + // Implementations of this interface need to call this to setup + // appropriate logging etc. + void init(); + + c10::intrusive_ptr store_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int size_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + BackendType backendType_; + std::string pg_desc_; + int64_t splitCounter_; + + // Debug level setting. It is parsed once when ProcessGroup is constructed and + // remains the same across use of this process group. + DebugLevel dist_debug_level_{DebugLevel::Off}; + + // Backend classes for this ProcessGroup + std::unordered_set deviceTypes_; + // This mapping is ordered, as splitGroup must call split on the underlying + // backends in a consistent order. + std::map deviceTypeToBackendType_; + std::unordered_map> + deviceTypeToBackend_; + std::unordered_map> + backendTypeToBackend_; + + std::optional bound_device_id_; +}; + +// Thread local functions for managing the currently active process group. +TORCH_API c10::intrusive_ptr& currentProcessGroup(); +TORCH_API void setProcessGroup(c10::intrusive_ptr processGroup); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2b4bed946be7bdaaf3601e1dd847410ef5b64f5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp @@ -0,0 +1,503 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +namespace c10d { + +constexpr const char* GLOO_BACKEND_NAME = "gloo"; + +// Control whether or not connections are established in a full mesh or lazily +// as needed. +static std::vector TORCH_GLOO_LAZY_INIT = {"TORCH_GLOO_LAZY_INIT"}; + +// Returns default value for lazyInit. +bool TORCH_API getDefaultGlooLazyInit(); + +// ProcessGroupGloo implements Gloo bindings for c10d. +// +// All functions on this class are expected to be called in the same +// order across processes in the group. This is the only way that we +// can guarantee to match up the same calls across processes. For +// multi-threaded usage of process groups, you can consider using +// multiple process group instances. +// +class TORCH_API ProcessGroupGloo : public Backend { + public: + // AsyncWork is the Gloo specific superclass for asynchronous work items. + // We can split asynchronous work into 3 phases: + // 1) Sanity checks and prepare input (e.g. memcpy) + // 2) Run operation on background thread + // 3) Synchronize with completion on foreground thread + // + // There is state to be shared between these 3 phases and all of this state + // is captured in the AsyncWork class and its derivatives. + // + // Note: while we are porting operations to use new style collectives, there + // is a split between operations using the existing caching approach and + // operations using the new AsyncWork base class. Over time we will port + // all operations and perform needed cleanup. + // + // FIXME: This probably should be called WorkGloo since the work is executed + // in sync mode by a background thread. + class TORCH_API AsyncWork : public Work { + public: + explicit AsyncWork( + std::shared_ptr context, + std::vector> outputTensors, + OpType opType, + uint64_t seq, + std::chrono::milliseconds timeout, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~AsyncWork() override = default; + + static void execute(const c10::intrusive_ptr& work); + + virtual void run() = 0; + + std::vector result() override; + + c10::intrusive_ptr getFuture() override; + uint64_t getSequencenumber() const override; + std::chrono::milliseconds getTimeout() const; + virtual const std::vector getInputTensors() = 0; + virtual const std::vector getOutputTensors() = 0; + inline std::string getProfilerTitle() const { + return profilingTitle_; + } + inline at::ThreadLocalState getTLS() const { + return tls_; + } + + protected: + friend class ProcessGroupGloo; + // unique id used to tell the trace buffer that this + // work has completed + std::optional trace_id_; + std::optional trace_reset_epoch_; + std::shared_ptr context_; + const std::chrono::milliseconds timeout_; + + private: + void finishWorkGloo(); + void finishWorkGlooError(const std::exception_ptr& eptr); + inline void recordAsyncWorkProfilingInfo( + const char* profilingTitle, + const std::optional>& inputTensors); + + const std::vector> outputTensors_; + c10::intrusive_ptr future_; + std::function recordFunctionBeforeCallback_; + const uint64_t seq_; + std::string profilingTitle_; + at::ThreadLocalState tls_; + }; + + // Wrap c10d store as Gloo store + class TORCH_API GlooStore : public ::gloo::rendezvous::Store { + public: + GlooStore(c10::intrusive_ptr<::c10d::Store> store) + : store_(std::move(store)) {} + + void setUint(const std::string& key, const std::vector& value) { + store_->set(key, value); + } + + void set(const std::string& key, const std::vector& value) override { + std::vector tmp(value.begin(), value.end()); + store_->set(key, tmp); + } + + std::vector getUint(const std::string& key) { + auto value = store_->get(key); + return value; + } + + std::vector get(const std::string& key) override { + auto value = store_->get(key); + return std::vector(value.begin(), value.end()); + } + + void wait(const std::vector& keys) override { + store_->wait(keys, ::c10d::Store::kDefaultTimeout); + } + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override { + store_->wait(keys, timeout); + } + +#ifdef GLOO_STORE_HAS_STORE_V2 + bool has_v2_support() override { + return store_->hasExtendedApi(); + } + + std::vector> multi_get( + const std::vector& keys) override { + std::vector> res; + for (auto& value : store_->multiGet(keys)) { + res.emplace_back(value.begin(), value.end()); + } + return res; + } + + void multi_set( + const std::vector& keys, + const std::vector>& values) override { + std::vector> u_values; + u_values.reserve(values.size()); + for (auto& value : values) { + u_values.emplace_back(value.begin(), value.end()); + } + store_->multiSet(keys, u_values); + } + + void append(const std::string& key, const std::vector& value) + override { + std::vector tmp(value.begin(), value.end()); + return store_->append(key, tmp); + } + + int64_t add(const std::string& key, int64_t value) override { + return store_->add(key, value); + } +#endif + + const c10::intrusive_ptr<::c10d::Store>& _getStore() const { + return store_; + } + + protected: + c10::intrusive_ptr<::c10d::Store> store_; + }; + + // For send and recv operations there is no need to pass them to the + // thread pool as they are entirely completed by the device thread. + // This work object is used to synchronize completion of the send or + // recv operation. It keeps a reference to the tensor it is + // operating on to prevent it from being deallocated while the + // operation is still in flight. + class TORCH_API SendWork : public Work { + public: + explicit SendWork( + at::Tensor& tensor, + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer, + uint64_t seq); + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void abort() override; + + uint64_t getSequencenumber() const override; + + protected: + at::Tensor tensor_; + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer_; + const uint64_t seq_; + }; + + class TORCH_API RecvWork : public Work { + public: + explicit RecvWork( + at::Tensor& tensor, + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer, + OpType opType, + uint64_t seq, + const char* profilingTitle = nullptr); + + int sourceRank() const override; + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void abort() override; + + uint64_t getSequencenumber() const override; + + protected: + at::Tensor tensor_; + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer_; + int srcRank_; + const uint64_t seq_; + }; + + struct TORCH_API Options : public Backend::Options { + explicit Options( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + + // return intrusive_ptr of the object + static c10::intrusive_ptr create( + std::chrono::milliseconds timeout = kBackendDefaultTimeout) { + return c10::make_intrusive(timeout); + } + + static c10::intrusive_ptr create_default( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + + std::vector> devices; + int threads; + }; + + const std::string getBackendName() const override { + return std::string(GLOO_BACKEND_NAME); + } + + bool supportsSplitting() const override { + return true; + } + + // Helper functions to create a new device object. + // They are static functions on this class to keep them logically + // separate from the rest of the code base (e.g. torch/csrc/distributed). + + // Create new device instance for specific interface. + static std::shared_ptr<::gloo::transport::Device> createDeviceForInterface( + const std::string& interface, + bool lazyInit = false); + + // Create new device instance for specific hostname or address. + static std::shared_ptr<::gloo::transport::Device> createDeviceForHostname( + const std::string& hostname, + bool lazyInit = false); + + // Create new device instance. + // It tries to resolve this machine's hostname and bind to that address. + // If that fails (i.e. the hostname doesn't resolve to an address), it + // falls back to binding to the loopback address. + static std::shared_ptr<::gloo::transport::Device> createDefaultDevice( + bool lazyInit = false); + + explicit ProcessGroupGloo( + const c10::intrusive_ptr& store, + int rank, + int size, + c10::intrusive_ptr options = Options::create()); + + ~ProcessGroupGloo() override; + + c10::intrusive_ptr getOptions() { + return options_; + } + + void setTimeout(std::chrono::milliseconds timeout) override { + options_->timeout = timeout; + for (auto& context : contexts_) { + context->setTimeout(timeout); + } + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) override; + + c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) override; + + const std::vector& groupRanks() const; + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_sparse( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& output_tensor, + at::Tensor& input_tensor, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& output_lists, + std::vector& input_list, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputs, + std::vector& inputs, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputs, + std::vector>& inputs, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputs, + std::vector>& inputs, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputCounts, + std::vector& inputCounts, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + void enableCollectivesTiming() override; + + const std::shared_ptr<::gloo::rendezvous::Store>& _getStore() const { + return store_; + } + + // Similar to barrier(), but blocks rank 0 until all other ranks have + // acknowledged that they are alive (through send/recv from rank 0). Rank 0 + // is able to report all failed ranks if waitAllRanks = true, otherwise + // reports the first rank it detected as failed. + void monitoredBarrier( + const BarrierOptions& opts = BarrierOptions(), + bool waitAllRanks = false) override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + int getNumThreads() { + return options_->threads; + } + + protected: + std::shared_ptr<::gloo::rendezvous::Store> store_; + const c10::intrusive_ptr options_; + + // Every Gloo context represents a set of connections to its peers. + // In order to use more than one device (or allow for parallelism on + // a single device), you need multiple contexts. + std::vector> contexts_; + std::vector threads_; + bool stop_; + + // Incremented for every collective we kick off. + // The value is used as tag for collective operations. Collectives are kicked + // off in identical order across processes. Therefore the tag can be used + // to match up operations during concurrent execution. + uint32_t collectiveCounter_; + + // Returns next collective tag to use (uses collectiveCounter_). + uint32_t nextTag(); + + // Returns the context to use for the specified tag. + // With `nextTag` returning an increasing number, this should lead + // to contexts being used in a round-robin fashion. + std::shared_ptr<::gloo::Context> getContext(uint32_t tag); + + // Entrypoint for worker threads. + void runLoop(int workerIndex); + + // Queue work to run on worker thread. + void enqueue(c10::intrusive_ptr work); + + // Keep both a queue of pending work, and a vector with in progress work. + // Both of these can only be mutated when holding the queue lock. + // We keep both around instead of just the queue, so we can grab a weak_ptr + // to all in progress and pending work when executing a barrier. + // When executing a barrier, we need to ensure that all prior work + // has completed before completing itself. + std::deque> workQueue_; + std::vector> workInProgress_; + std::mutex workMutex_; + std::condition_variable workProduceCV_; + std::condition_variable workConsumeCV_; + uint64_t seq_{0}; + size_t local_id_; + std::shared_ptr pgStatus_ = + std::make_shared(); +}; + +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4fa60b50e78683e889c9b153fd2e816350c1d44b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp @@ -0,0 +1,679 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define GENERATE_ALL_TYPES(type, func, ...) \ + switch (type) { \ + case ::at::ScalarType::Float: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Double: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Half: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::BFloat16: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Char: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Byte: \ + case ::at::ScalarType::Bool: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Int: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Long: \ + func(__VA_ARGS__); \ + break; \ + default: \ + TORCH_CHECK(false, "Invalid scalar type"); \ + } + +#define HOST_NAME_MAX 256 +#else +#define GENERATE_ALL_TYPES(type, func, args...) \ + switch (type) { \ + case ::at::ScalarType::Float: \ + func(args); \ + break; \ + case ::at::ScalarType::Double: \ + func(args); \ + break; \ + case ::at::ScalarType::Half: \ + func(args); \ + break; \ + case ::at::ScalarType::BFloat16: \ + func(args); \ + break; \ + case ::at::ScalarType::Char: \ + func(args); \ + break; \ + case ::at::ScalarType::Byte: \ + case ::at::ScalarType::Bool: \ + func(args); \ + break; \ + case ::at::ScalarType::Int: \ + func(args); \ + break; \ + case ::at::ScalarType::Long: \ + func(args); \ + break; \ + default: \ + TORCH_CHECK(false, "Invalid scalar type"); \ + } +#endif + +namespace c10d { + +TORCH_DECLARE_TYPED_REGISTRY( + GlooAllreduceRegistry, + c10::DeviceType, + ProcessGroupGloo::AsyncWork, + c10::intrusive_ptr, + std::shared_ptr, + std::vector&, + ReduceOp, + uint32_t, + uint64_t, + std::chrono::milliseconds); + +// This function initializes a vector of CUDA streams, one for every +// tensor in the input tensor vector, and ensures that these streams are +// synchronized with the current default streams. This is needed so +// that new work on the new streams is serialized w.r.t. all operations +// on the tensors. +TORCH_API void initializeStreamsEvents( + const std::vector& tensors, + std::vector& streams, + std::vector& events); + +// This function initializes a vector of CUDA streams, one per device, +// and ensures that these streams are synchronized with the current default +// streams. It is assumed that the tensors in the nested tensor vectors are +// on the same device. +TORCH_API void initializeStreamsEvents( + std::vector>& tensors, + std::vector& streams, + std::vector& events); + +typedef void (*ReduceFunc)(void*, const void*, const void*, size_t); + +template , int> = 0> +ReduceFunc toFunction(const ReduceOp& r) { + switch (r) { + case ReduceOp::SUM: + case ReduceOp::AVG: + return ReduceFunc(&::gloo::sum); + case ReduceOp::PRODUCT: + return ReduceFunc(&::gloo::product); + case ReduceOp::MIN: + return ReduceFunc(&::gloo::min); + case ReduceOp::MAX: + return ReduceFunc(&::gloo::max); + case ReduceOp::BAND: + TORCH_CHECK(false, "Cannot use ReduceOp.BAND with non-integral dtype"); + break; + case ReduceOp::BOR: + TORCH_CHECK(false, "Cannot use ReduceOp.BOR with non-integral dtype"); + break; + case ReduceOp::BXOR: + TORCH_CHECK(false, "Cannot use ReduceOp.BXOR with non-integral dtype"); + break; + case ReduceOp::PREMUL_SUM: + TORCH_CHECK(false, "Cannot use ReduceOp.PREMUL_SUM with Gloo"); + break; + case ReduceOp::UNUSED: + default: + break; + } + + TORCH_CHECK(false, "Unhandled ReduceOp"); +} + +// Bitwise AND with SFINAE guard for integral types. +template , int> = 0> +void band(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] & tb[i]; + } +} + +// Bitwise OR with SFINAE guard for integral types. +template , int> = 0> +void bor(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] | tb[i]; + } +} + +// Bitwise XOR with SFINAE guard for integral types. +template , int> = 0> +void bxor(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] ^ tb[i]; + } +} + +template , int> = 0> +ReduceFunc toFunction(const ReduceOp& r) { + switch (r) { + case ReduceOp::SUM: + case ReduceOp::AVG: + return ReduceFunc(&::gloo::sum); + case ReduceOp::PRODUCT: + return ReduceFunc(&::gloo::product); + case ReduceOp::MIN: + return ReduceFunc(&::gloo::min); + case ReduceOp::MAX: + return ReduceFunc(&::gloo::max); + case ReduceOp::BAND: + return ReduceFunc(&band); + case ReduceOp::BOR: + return ReduceFunc(&bor); + case ReduceOp::BXOR: + return ReduceFunc(&bxor); + case ReduceOp::PREMUL_SUM: + TORCH_CHECK(false, "Cannot use ReduceOp.PREMUL_SUM with Gloo"); + break; + case ReduceOp::UNUSED: + default: + break; + } + + TORCH_CHECK(false, "Unhandled ReduceOp"); +} + +template +void setInputs(O& opts, std::vector& tensors) { + opts.setInputs(getDataPointers(tensors), tensors[0].numel()); +} + +template +void setInput(O& opts, at::Tensor& tensor) { + opts.setInput(getDataPointer(tensor), tensor.numel()); +} + +template +void setInput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setInput(getDataPointer(tensor), counts); +} + +template +void setInput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setInput(getDataPointer(tensor), counts); +} + +template +void setOutputs(O& opts, std::vector& tensors, int64_t count) { + opts.setOutputs(getDataPointers(tensors), count); +} + +template +void setOutput(O& opts, at::Tensor& tensor) { + opts.setOutput(getDataPointer(tensor), tensor.numel()); +} + +template +void setOutput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setOutput(getDataPointer(tensor), counts); +} + +template +void setOutput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setOutput(getDataPointer(tensor), counts); +} + +static at::Tensor pinnedLike(at::Tensor& tensor) { + auto* allocator = at::detail::getCUDAHooks().getPinnedMemoryAllocator(); + auto storage = c10::Storage( + c10::Storage::use_byte_size_t(), + static_cast(at::detail::computeStorageNbytes( + tensor.sizes(), tensor.strides(), tensor.dtype().itemsize())), + allocator, + /*resizable=*/false); + return at::empty({0}, tensor.options().device(at::kCPU)) + .set_(storage, 0, tensor.sizes(), tensor.strides()); +} + +class AsyncAllreduceWork : public ProcessGroupGloo::AsyncWork { + public: + AsyncAllreduceWork( + std::shared_ptr context, + std::vector& inputs, + ReduceOp reduceOp, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : ProcessGroupGloo::AsyncWork( + std::move(context), + {inputs}, + OpType::ALLREDUCE, + seq, + timeout, + "gloo:all_reduce", + inputs), + inputs(inputs), + reduceOp(std::move(reduceOp)), + tag(tag) {} + + std::vector inputs; + const ReduceOp reduceOp; + const uint32_t tag; + + void allreduce(std::vector& tensors) { + auto tensor = tensors[0]; + if (tensor.is_complex()) { + TORCH_CHECK( + c10d::isComplexViewAsRealAllowed(reduceOp), + "all_reduce does not support", + reduceOp, + "on complex tensors"); + tensor = at::view_as_real(tensor); + } + gloo::AllreduceOptions opts(context_); + const auto& scalarType = tensor.scalar_type(); + opts.setReduceFunction(getFunction(scalarType, reduceOp)); + opts.setTag(tag); + opts.setTimeout(timeout_); + // Use tensor.numel() instead of tensors[0].numel() to + // get the right number of elements when tensors[0] is complex + GENERATE_ALL_TYPES(scalarType, setOutputs, opts, tensors, tensor.numel()); + gloo::allreduce(opts); + + // Gloo doesn't support AVG so we use SUM + division. + if (reduceOp == ReduceOp::AVG) { + tensors[0] /= context_->size; + } + } + + const std::vector getInputTensors() override { + return inputs; + } + + const std::vector getOutputTensors() override { + return inputs; + } + + void run() override { + allreduce(inputs); + } + + template + void getFunction(gloo::AllreduceOptions::Func& fn, const ReduceOp op) { + fn = toFunction(op); + } + + gloo::AllreduceOptions::Func getFunction( + const at::ScalarType& dtype, + const ReduceOp& op) { + gloo::AllreduceOptions::Func fn; + GENERATE_ALL_TYPES(dtype, getFunction, fn, op); + return fn; + } +}; + +class AsyncAllreduceCoalescedWork : public AsyncAllreduceWork { + public: + AsyncAllreduceCoalescedWork( + const std::shared_ptr& context, + std::vector& inputs, + ReduceOp reduceOp, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : AsyncAllreduceWork( + context, + inputs, + std::move(reduceOp), + tag, + seq, + timeout) {} + + void run() override { + allreduceCoalesced(inputs); + } + + private: + void allreduceCoalesced(std::vector& tensors) { + // reduce coalesced, flattened tensors. + at::Tensor coalescedTensor = flattenDenseTensors(tensors); + std::vector allreduceInput = {coalescedTensor}; + allreduce(allreduceInput); + + // separate and reshape tensors. + size_t offset = 0; + for (at::Tensor& tensor : tensors) { + const int64_t tensorNumel = tensor.numel(); + const c10::IntArrayRef tensorShape = tensor.sizes(); + tensor.copy_(coalescedTensor.slice(0, offset, offset + tensorNumel) + .view(tensorShape)); + offset += tensorNumel; + } + } +}; + +class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { + public: + AsyncSparseAllreduceWork( + std::shared_ptr context, + std::vector& inputs, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : ProcessGroupGloo::AsyncWork( + std::move(context), + {inputs}, + OpType::_ALLREDUCE_SPARSE, + seq, + timeout, + "gloo:sparse_all_reduce", + inputs), + inputs(inputs), + tag(tag) {} + + std::vector inputs; + const uint32_t tag; + + // We share dimensionality about the sparse tensors before collecting + // their contents. We assume here that the maximum number of sparse + // and dense dimensions is 4. This is stored in a contiguous piece of + // memory so that we can easily run allgather on it. + // + // The layout of this memory is as follows: + // + // - [0:4]: sparse dims + // - [4:8]: dense dims + // - [8]: nnz + // + class SparseTensorMetadata { + public: + static constexpr auto dim = 9; + + // Construct from an existing metadata tensor to facilitate structured + // access to metadata from peers, after gathering it. + explicit SparseTensorMetadata(at::Tensor metadata) + : metadata_(std::move(metadata)), + data_(metadata_.mutable_data_ptr()) { + AT_ASSERT(metadata_.scalar_type() == at::kLong); + AT_ASSERT(metadata_.dim() == 1); + AT_ASSERT(metadata_.size(0) == dim); + } + + // Populate the metadata. + void populate_from_sparse_tensor(const at::Tensor& tensor) { + const auto sparse_dim = tensor.sparse_dim(); + AT_ASSERT(sparse_dim <= 4); + for (const auto i : c10::irange(4)) { + if (i < sparse_dim) { + data_[i] = tensor.size(i); + } + } + const auto dense_dim = tensor.dense_dim(); + AT_ASSERT(dense_dim <= 4); + for (const auto i : c10::irange(4)) { + if (i < dense_dim) { + data_[i + 4] = tensor.size(sparse_dim + i); + } + } + data_[8] = tensor._nnz(); + } + + std::vector sizes() const { + std::vector sizes; + // Sparse sizes + for (const auto i : c10::irange(4)) { + if (data_[i] <= 0) { + break; + } + sizes.push_back(data_[i]); + } + // Dense sizes + for (const auto i : c10::irange(4, 8)) { + if (data_[i] <= 0) { + break; + } + sizes.push_back(data_[i]); + } + return sizes; + } + + int64_t nnz() const { + return data_[8]; + } + + protected: + at::Tensor metadata_; + int64_t* data_; + }; + + // Sparse allreduce is implemented with allgather on indices and values. + // Every process then sums the resulting sparse tensors locally. + // The nnz for sparse tensors may be different across processes, so first + // we run allgather on the nnz, and then allgather with max(nnz). + at::Tensor allreduce(std::vector& tensors) { + // TODO: This is a massive hack! There is some confusion about + // Variable/Tensor inside the body of this function. Turning off + // grad smooths over the confusion for now. This fixes + // test/test_c10d_gloo.py ProcessGroupGlooTest.test_sparse_allreduce_basics + // + // The correct fix is to stop allocating tensors that are not variables, + // but to conveniently do this c10d must depend on torch not ATen + at::AutoDispatchBelowAutograd guard; + auto input = tensors[0]; + + // Perform local reduction if we have multiple inputs. + for (const auto i : c10::irange(1, tensors.size())) { + input += tensors[i]; + } + + // Need to coalesce before we can access indices and values. + input = input.coalesce(); + + // Gather metadata information from all ranks. + auto metadata = allgather_metadata(input); + + // Sanity check dimensionality across ranks. + { + const auto expected = metadata[context_->rank].sizes(); + for (const auto i : c10::irange(context_->size)) { + if (i == context_->rank) { + continue; + } + const auto actual = metadata[i].sizes(); + TORCH_CHECK(actual == expected, "Sparse dimensions do not match"); + } + } + + // Gather all indices and all values. + auto indices = allgather_indices(input, metadata); + auto values = allgather_values(input, metadata); + + // Perform global reduction. + AT_ASSERT(static_cast(indices.size()) == context_->size); + AT_ASSERT(static_cast(values.size()) == context_->size); + auto output = at::sparse_coo_tensor( + indices[0], values[0], input.sizes(), input.options()); + for (const auto i : c10::irange(1, context_->size)) { + output += at::sparse_coo_tensor( + indices[i], values[i], input.sizes(), input.options()); + } + + // Coalesce for good measure. + return output.coalesce(); + } + + void run() override { + auto output = allreduce(inputs); + + // This copy is needed when we run a multi-gpu version of reduce (multiple + // inputs per rank). + for (const auto i : c10::irange(inputs.size())) { + inputs[i].copy_(output); + } + } + + const std::vector getInputTensors() override { + return inputs; + } + + const std::vector getOutputTensors() override { + return inputs; + } + + private: + std::vector allgather_metadata( + const at::Tensor& tensor) { + auto buffer = + at::zeros({context_->size, SparseTensorMetadata::dim}, at::kLong); + + // Prepare metadata vector (1 entry per rank) + std::vector metadata; + metadata.reserve(context_->size); + for (const auto i : c10::irange(context_->size)) { + metadata.emplace_back(buffer.select(0, i)); + } + + // Populate data for this rank + metadata[context_->rank].populate_from_sparse_tensor(tensor); + + // Allgather metadata + gloo::AllgatherOptions opts(context_); + opts.setOutput(buffer.mutable_data_ptr(), buffer.numel()); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgather(opts); + + return metadata; + } + + std::vector allgather_indices( + const at::Tensor& tensor, + const std::vector& metadata) { + const auto sparseDim = tensor.sparse_dim(); + + std::vector counts(context_->size); + size_t totalSize = 0; + for (const auto i : c10::irange(metadata.size())) { + counts[i] = metadata[i].nnz() * sparseDim; + totalSize += counts[i]; + } + + auto output = at::empty({static_cast(totalSize)}, at::kLong); + + // tensors copied from cuda may not be contiguous, get a contiguous + // tensor before use its data_ptr + auto input = tensor.indices().contiguous(); + + // Allgatherv indices. + gloo::AllgathervOptions opts(context_); + opts.setInput( + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + const_cast(input.const_data_ptr()), + input.numel()); + opts.setOutput(output.mutable_data_ptr(), counts); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgatherv(opts); + + // Compile indices tensor per rank. + std::vector indices; + indices.reserve(metadata.size()); + int64_t offset = 0; + for (const auto& i : metadata) { + const auto nnz = i.nnz(); + const auto numel = sparseDim * nnz; + indices.push_back( + output.narrow(0, offset, numel).reshape({sparseDim, nnz})); + offset += numel; + } + + return indices; + } + + std::vector allgather_values( + const at::Tensor& tensor, + const std::vector& metadata) { + // There are nnz #dense_dim()-dimensional tensors per rank. + const auto valueShape = tensor.sizes().slice(tensor.sparse_dim()); + int64_t denseNumel = 1; + for (auto dim : valueShape) { + denseNumel *= dim; + } + + std::vector counts(context_->size); + int64_t totalSize = 0; + for (const auto i : c10::irange(metadata.size())) { + counts[i] = metadata[i].nnz() * denseNumel; + totalSize += static_cast(counts[i]); + } + + auto output = at::empty({totalSize}, tensor.scalar_type()); + + // Allgatherv indices. + gloo::AllgathervOptions opts(context_); + // tensors copied from cuda may not be contiguous, get a contiguous + // tensor before use its data_ptr + at::Tensor valueTensor = tensor.values().contiguous(); + GENERATE_ALL_TYPES(valueTensor.scalar_type(), setInput, opts, valueTensor); + GENERATE_ALL_TYPES( + valueTensor.scalar_type(), setOutput, opts, output, counts); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgatherv(opts); + + // Compile values tensor per rank. + std::vector values; + values.reserve(metadata.size()); + int64_t offset = 0; + for (const auto& i : metadata) { + const auto nnz = i.nnz(); + const auto numel = denseNumel * nnz; + auto tensorShape = std::vector({(int64_t)nnz}); + std::copy( + valueShape.begin(), + valueShape.end(), + std::back_inserter(tensorShape)); + values.push_back(output.narrow(0, offset, numel).reshape(tensorShape)); + offset += numel; + } + + return values; + } +}; + +} // namespace c10d + +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66d592145e2a8cc0d5ebe5a97c43e660788a6606 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp @@ -0,0 +1,278 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_MPI + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +namespace c10d { + +constexpr const char* MPI_BACKEND_NAME = "mpi"; + +// WorkEntry is the state associated with a single MPI run instance. +// It include the source Tensor list and destination Tensor list, as well as +// The actual run function that will operate either on src or dst or both. +struct WorkEntry { + explicit WorkEntry( + std::vector* srcPtr, + std::vector* dstPtr, + std::function&)> run) + : dst(dstPtr ? *dstPtr : std::vector()), run(std::move(run)) { + if (srcPtr) { + src = *srcPtr; + } + } + + // Not copyable + WorkEntry(const WorkEntry&) = delete; + // Not copy assignable + WorkEntry& operator=(const WorkEntry&) = delete; + + // For input and output tensors (in-place), we will always use src + std::vector src; + + // Copy of user provided outputs. + const std::vector dst; + + // src rank returned, for recv only + int* srcRank = nullptr; + std::function&)> run; +}; + +// ProcessGroupMPI implements MPI bindings for c10d. +// +// All functions on this class are expected to be called in the same +// order across processes in the group. This is the only way that we +// can guarantee to match up the same calls across processes. +// +// All MPI functions provided by this class is asynchronously scheduled on a +// Worker thread. Therefore, ProcessGroupMPI requires the MPI implementation +// that is used to have a minimum thread support value of MPI_THREAD_SERIALIZED. +// That is, The process may be multi-threaded, and multiple threads may make +// MPI calls, but only one at a time: MPI calls are not made concurrently from +// two distinct threads (all MPI calls are serialized). However, with +// MPI_THREAD_SERIALIZED, ProcessGroupMPI will only support a single process +// group. In other words, no more than 1 process group can be created globally. +// +// If you would like to use multiple ProcessGroupMPI, it requires your MPI +// implementation to have a thread support value of MPI_THREAD_MULTIPLE, that +// is, multiple threads may call MPI, with no restriction. +// +// Also note that ProcessGroupMPI only supports a single Tensor operation. In +// other words, the size of the input Tensor vector should always be 1. +// +// CUDA tensor can be supported if the MPI used is CUDA-aware MPI, and +// ProcessGroupMPI will automatically detect this support. +class TORCH_API ProcessGroupMPI : public Backend { + public: + class WorkMPI : public Work { + public: + explicit WorkMPI( + std::vector outputTensors, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt) + : Work(-1, OpType::UNKNOWN, profilingTitle, inputTensors), + outputTensors_(std::move(outputTensors)), + future_(c10::make_intrusive( + c10::ListType::create(c10::TensorType::get()))) {} + + std::vector result() override; + + c10::intrusive_ptr getFuture() override; + + protected: + friend class ProcessGroupMPI; + + private: + void finishWorkMPI(); + void finishWorkMPIError(const std::exception_ptr& eptr); + + std::vector outputTensors_; + c10::intrusive_ptr future_; + }; + + class AsyncWork : public Work { + public: + AsyncWork( + MPI_Request request, + std::vector outputTensors, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~AsyncWork() override; + + bool isCompleted() override; + + bool isSuccess() const override; + + int sourceRank() const override; + + bool wait(std::chrono::milliseconds timeout = kUnsetTimeout) override; + + void abort() override; + + std::vector result() override; + + protected: + void populateException(); + + private: + const std::vector outputTensors_; + MPI_Request request_; + MPI_Status status_{}; + }; + + // Constructor will spawn up the worker thread loop + explicit ProcessGroupMPI(int rank, int size, MPI_Comm pgComm); + + ~ProcessGroupMPI() override; + + // Abort the MPI program, needs to be called when exception is detected + void abort() override; + + const std::string getBackendName() const override { + return std::string(MPI_BACKEND_NAME); + } + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputbuffer, + at::Tensor& inputbuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensor, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + // Creating a new ProcessGroupMPI, will initialize MPI if not initialized + static c10::intrusive_ptr createProcessGroupMPI( + std::vector ranks = {}); + + protected: + using WorkType = + std::tuple, c10::intrusive_ptr>; + // Worker thread loop + void runLoop(); + // Helper function that is called by the destructor + void destroy(); + + c10::intrusive_ptr enqueue( + std::unique_ptr entry, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + bool stop_; + + std::mutex pgMutex_; + std::thread workerThread_; + + std::deque queue_; + std::condition_variable queueProduceCV_; + std::condition_variable queueConsumeCV_; + + // Global states + static void initMPIOnce(); + static void mpiExit(); + + static std::mutex pgGlobalMutex_; + static int mpiThreadSupport_; + + MPI_Comm pgComm_; +}; + +} // namespace c10d + +#endif // USE_C10D_MPI + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp new file mode 100644 index 0000000000000000000000000000000000000000..262f50440c2f6550d4449aac19f737b70c1f22bb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp @@ -0,0 +1,1535 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#if defined(__linux__) +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace c10d { + +// Control broadcasting of NCCL uniqueId +static std::vector TORCH_NCCL_BCAST_UNIQUEID = { + "TORCH_NCCL_BCAST_UNIQUEID"}; + +// Control EagerInit P2P serialization warning +static std::vector + TORCH_NCCL_SHOW_EAGER_INIT_P2P_SERIALIZATION_WARNING = { + "TORCH_NCCL_SHOW_EAGER_INIT_P2P_SERIALIZATION_WARNING"}; + +// Control whether to always use high priority streams +static std::vector TORCH_NCCL_HIGH_PRIORITY = { + "TORCH_NCCL_HIGH_PRIORITY"}; + +// Control whether or not wait() is blocking or non-blocking. +static std::vector TORCH_NCCL_BLOCKING_WAIT = { + "TORCH_NCCL_BLOCKING_WAIT", + "NCCL_BLOCKING_WAIT"}; + +// TODO: We want to eventually remove this variable and make users to use +// the default value (3 - SkipCleanUp). +// Control whether or not we perform Async Error Handling with NCCL. +static std::vector TORCH_NCCL_ASYNC_ERROR_HANDLING = { + "TORCH_NCCL_ASYNC_ERROR_HANDLING", + "NCCL_ASYNC_ERROR_HANDLING"}; + +// Control whether dumping debug info on watchdog +// timeout is enabled. This variable must be set together with +// TORCH_NCCL_ENABLE_MONITORING=1 and TORCH_NCCL_TRACE_BUFFER_SIZE > 0. +static std::vector TORCH_NCCL_DUMP_ON_TIMEOUT = { + "TORCH_NCCL_DUMP_ON_TIMEOUT"}; + +// Control whether to propagate NCCL errors to all ranks through TCPStore. +static std::vector TORCH_NCCL_PROPAGATE_ERROR = { + "TORCH_NCCL_PROPAGATE_ERROR"}; + +// Control whether Desync Debug is enabled. This variable must be set +// together with TORCH_NCCL_ASYNC_ERROR_HANDLING. +static std::vector TORCH_NCCL_DESYNC_DEBUG = { + "TORCH_NCCL_DESYNC_DEBUG", + "NCCL_DESYNC_DEBUG"}; + +// Enable recording start-events for all ProcessGroupNCCL collectives, and +// compute accurate collective timing per-collective. (Note: end-events are +// recorded by default. Turn on this flag can increase chances of a watchdog +// hang due to performing a CUDA event query which eventually calls +// cudaEventElapsedTime() API. +static std::vector TORCH_NCCL_ENABLE_TIMING = { + "TORCH_NCCL_ENABLE_TIMING", + "NCCL_ENABLE_TIMING"}; + +// Enable monitoring thread which aborts the process when the ProcessGroupNCCL +// Watchdog thread gets stuck and no heartbeat is detected after +// TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC. This can happen due to calling CUDA/NCCL +// APIs that may hang. It is Useful to prevent jobs being stuck for a prolonged +// time than necessary tying up cluster resources. +static std::vector TORCH_NCCL_ENABLE_MONITORING = { + "TORCH_NCCL_ENABLE_MONITORING"}; + +// Control the watchdog heartbeat timeout period after which the monitoring +// thread will abort the process. +static std::vector TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC = { + "TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC"}; + +// Whether to rethrow CUDA Errors in the watchdog (default true) +static std::vector TORCH_NCCL_RETHROW_CUDA_ERRORS = { + "TORCH_NCCL_RETHROW_CUDA_ERRORS"}; + +// The maximum number of events we store in the flight recorder's ring buffer. +// (One event could be the start or end of a collective, for example). +static std::vector TORCH_NCCL_TRACE_BUFFER_SIZE = { + "TORCH_NCCL_TRACE_BUFFER_SIZE"}; + +// Control how much extra time we will wait for dumping the debugging info +// before we exit and throws timeout exception. +static std::vector TORCH_NCCL_WAIT_TIMEOUT_DUMP_MILSEC = { + "TORCH_NCCL_WAIT_TIMEOUT_DUMP_MILSEC"}; + +// Control the interval inside the monitoring thread to check the coordinated +// signal from other ranks, e.g. to dump the debugging information. +static std::vector TORCH_NCCL_COORD_CHECK_MILSEC = { + "TORCH_NCCL_COORD_CHECK_MILSEC"}; + +// Whether to log C++ stack traces on unclean shutdown (default true) +static std::vector TORCH_NCCL_LOG_CPP_STACK_ON_UNCLEAN_SHUTDOWN = { + "TORCH_NCCL_LOG_CPP_STACK_ON_UNCLEAN_SHUTDOWN"}; + +// Whether to include only active collectives in the Flight Recorder trace +// (default false) +static std::vector TORCH_NCCL_EXTRA_DUMP_ON_EXEC = { + "TORCH_NCCL_EXTRA_DUMP_ON_EXEC"}; + +// Control whether to use CudaEventCache for the collective in watchdog thread. +// We noticed in the past when cuda global lock is held, destroying CudaEvent +// can cause a hang. +static std::vector TORCH_NCCL_CUDA_EVENT_CACHE = { + "TORCH_NCCL_CUDA_EVENT_CACHE"}; + +// Control the number of ranks each root can cover during NCCL comm init. +static std::vector TORCH_NCCL_RANKS_PER_ROOT = { + "TORCH_NCCL_RANKS_PER_ROOT"}; + +static std::vector TORCH_NCCL_NAN_CHECK = {"TORCH_NCCL_NAN_CHECK"}; + +constexpr const char* NCCL_BACKEND_NAME = "nccl"; + +constexpr const char* kStoreDumpKey = "exception_dump"; + +constexpr const char* kStoreErrorSignalKey = "remote_error"; + +constexpr const int kWorkStatusUpdatePeriodMs = 30 * 1000; // 30 seconds + +constexpr auto kProcessGroupNCCLDefaultTimeout = + std::chrono::milliseconds(10 * 60 * 1000); + +// NoHandling: do not handle asynchronous NCCL errors +// TearDown: tear down process upon error, see `WorkNCCL::handleException` +// CleanUpOnly: just clean up collectives and abort communicators without +// tearing down process SkipCleanUp: (this is a temporary option and can be +// removed in future) tear down process without cleaning up NCCL communicators. +// This should be used as a last resort in case `ncclCommAbort` itself is +// hanging +enum ErrorHandlingMode { + NoHandling = 0, + TearDown = 1, + CleanUpOnly = 2, + SkipCleanUp = 3 +}; + +#define SHOULD_CLEAN_UP(a) (a != NoHandling && a != SkipCleanUp) + +#define SHOULD_TEAR_DOWN(a) (a != NoHandling && a != CleanUpOnly) + +#define PRINT_COLLECTIVE_HASH_SIGNATURE(phase, opType, numel, hashValue) \ + LOG(WARNING) << logPrefix() << "Hash of " << phase << " to NCCL " << opType \ + << " with size " << numel << " is " << hashValue; + +// If set, ProcessGroupNCCL doesn't use recordStream calls to ensure +// caching allocator safety for tensors used on both user-facing and +// internal comm streams. +// Instead, it stashes live references to those tensors until after +// user-facing streams are synced with comm streams. +// See stashed_for_allocator_safety_ below. +static std::vector TORCH_NCCL_AVOID_RECORD_STREAMS = { + "TORCH_NCCL_AVOID_RECORD_STREAMS"}; + +// If set, ProcessGroupNCCL registers postAlloc and preFree hooks to cuda cache +// allocator so that whenever a tensor is allocated or freed, ProcessGroupNCCL +// can register/deregister the tensor on all available NCCL communicators. +static std::vector TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK = + {"TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK", + "NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK"}; + +#if defined(__linux__) +struct DumpPipe { + DumpPipe(int rank) { + std::string fileStem = + getCvarString({"TORCH_NCCL_DEBUG_INFO_PIPE_FILE"}, ""); + if (fileStem.empty() || + getCvarInt({"TORCH_NCCL_TRACE_BUFFER_SIZE"}, 0) <= 0) { + return; + } + TORCH_CHECK(!fileStem.empty(), "TORCH_NCCL_DEBUG_INFO_PIPE_FILE is empty"); + std::string filename = c10::str(fileStem, rank, ".pipe"); + TORCH_CHECK( + unlink(filename.c_str()) != -1 || errno == ENOENT, + "Error removing existing named pipe ", + filename, + ", Error: ", + std::strerror(errno)); + TORCH_CHECK( + mkfifo(filename.c_str(), 0666) != -1, + "Error creating named pipe ", + filename, + ", Error: ", + std::strerror(errno)); + fd_ = open(filename.c_str(), O_RDONLY | O_NONBLOCK); + LOG(INFO) << "Pipe file " << filename + << " has been opened, write to it to trigger NCCL Debug Dump."; + TORCH_CHECK(fd_ != -1, "Error opening named pipe ", filename); + } + bool shouldDump() { + if (fd_ == -1) { + return false; + } + // NOLINTNEXTLINE(*array*) + char buf[128]{}; + // non-blocking from O_NONBLOCK above. + // Ignore EINTR because we already will poll this + // again later. + ssize_t bytesRead = read(fd_, &buf, 128); + return bytesRead > 0; + } + ~DumpPipe() { + if (fd_ != -1) { + close(fd_); + } + } + + private: + int fd_ = -1; +}; +#else +struct DumpPipe { + DumpPipe(int rank) {} + bool shouldDump() { + return false; + } +}; +#endif + +// A shelf for stashing tensors between op call and `work.wait()`. +// Used in case of async ops. +class TensorShelf { + public: + // Stash tensors so that CachingAllocator cannot recycle them prematurely. + void stash(std::vector& tensors); + // Stash tensors from another shelf. + void stash(TensorShelf& other); + // Unstage the stashed tensors so that CachingAllocator can recycle them. + // Same as `clear()`. + void unstash(); + // Whether shelf is empty. + bool empty(); + // Clear the shelf. + void clear(); + + protected: + // Get the inner tensor vector. Use with caution as it is not protected by + // mutex. + std::vector& get(); + + private: + std::vector tVector_; + // Need a mutex to protect `tVector_` because it can be potentially accessed + // from both main thread and watchdog thread. + std::mutex mutex_; +}; + +// ProcessGroupNCCL implements NCCL bindings for c10d. +// +// All functions of the class are expected to be called in the same order +// across all processes in the process group. This is the only way that we +// can guarantee to match up the same calls among all processes. +// +// All NCCL functions provided by this class are asynchronous functions. More +// specifically, each NCCL call is scheduled on a separate CUDA stream that is +// different from the current CUDA stream. This is for the purpose of +// achieving potentially concurrency and better performance. As a result, +// it is the callers' responsibility to make sure that the CUDA stream their +// code works on needs to wait for the NCCL operation from +// this class. +// +// This can be done by calling: +// +// either WorkNCCL::wait() or WorkNCCL::synchronize(), both achieves the same +// functionality and are synonyms. +// +// Also note that WorkNCCL::finishedGPUExecution() is a helper function only +// provided by ProcessGroupNCCL to check if the NCCL operation of WorkNCCL has +// finished execution on the GPU (not just scheduled). +// +// Example on using the NCCL process group +// +// ProcessGroupNCCL pg(store, rank, size); +// std::shared_ptr work = pg.allreduce(tensors); +// +// // At this point, NCCL kernel has already by queued successfully +// // Now, let current stream wait for the NCCL to finish, this function is +// // async operation as well +// +// work->wait() +// +// // Now continue on other work in the current stream. +class TORCH_API ProcessGroupNCCL : public Backend { + public: + class WorkNCCL : public Work, public std::enable_shared_from_this { + public: + friend struct WorkInfo; + + // Constructor takes a list of CUDA devices + WorkNCCL( + std::string pgUID, + std::string pgDesc, + at::Device& device, + int rank, + OpType opType, + uint64_t seq, + bool isP2P = false, + const char* profilingTitle = nullptr, + const std::optional>& inputs = std::nullopt, + bool enableTiming = false, + bool cudaEventCacheEnabled = false, + DebugLevel distDebugLevel = DebugLevel::Off); + // Copy constructor doing partial copy without outputs_. Cleanup thread + // monitors and removes finished works. However it will deadlock when + // destructs outputs_ tensors who are view tensors in autograd graph. + WorkNCCL(const WorkNCCL& w); + + ~WorkNCCL() override = default; + + // Checks if the NCCL kernel has started to execute. + bool isStarted(); + + // Checks if request has completed. In this specific case of NCCL, it checks + // if the NCCL operation has completed on the GPU in its own NCCL stream. + // Non-blocking operation. + bool isCompleted() override; + + bool isSuccess() const override; + + // Same as calling synchronize() for NCCL work if timeout is not set. + // Otherwise, it will block the CPU thread until the NCCL work is completed + // or timed out. If timeout, exception will be thrown. + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void blockCurrentStream() override { + synchronize(); + } + + void abort() override; + + // Let current stream wait on the completion of the NCCL work + // Throws on exceptions. + void synchronize() override; + + // Synchronize streams by blocking each on the NCCL stream + void synchronizeStream(); + + // Helper function to handle exception (throw if needed). + void handleException(ErrorHandlingMode asyncErrorHandling); + + // Helper function that checks if the NCCL kernels have finished + // execution on the GPUs + bool finishedGPUExecution(); + + // Get a Future object that will be marked as completed internally. + c10::intrusive_ptr getFuture() override; + + // Get a Future result of each work (e.g. success, different error types). + // instead of the tensor output. + c10::intrusive_ptr getFutureResult() override; + + float getDuration() const override; + + uint64_t getSequencenumber() const override; + + const std::string& logPrefix() const; + + // Helper function that sets an exception_ptr on the WorkNCCL object. + void setException(std::exception_ptr exception_ptr); + + // Helper function that returns True if the WorkNCCL object has timed out + // and False otherwise. + // In case of timeout, set exception on the WorkNCCL object. + bool checkTimeout( + std::optional timeout = std::nullopt); + + // Print the traceback of the collective at call time + void printTraceback() const; + + std::string getTraceback() const; + + std::vector result() override; + + protected: + // The process group unique id + std::string pgUID_; + + // The process group description + std::string pgDesc_; + + // The cached list of CUDA devices to operate on + at::Device device_; + + // The start CUDA event of NCCL operator tracking this work item. These + // start CUDA events are needed by desync debugging if enabled. + std::shared_ptr ncclStartEvent_; + + // The end CUDA event of NCCL operator tracking this work item. + std::shared_ptr ncclEndEvent_; + + // The NCCL communicator used for this work item. + std::shared_ptr ncclComm_; + + // whether this work is a barrier op + bool isBarrierOp_{false}; + + // Clone of blockingWait_ from ProcessGroupNCCL. + bool blockingWait_{false}; + + // Clone of opTimeout_ from ProcessGroupNCCL. + std::chrono::milliseconds opTimeout_{}; + + // Ephemeral timeouts are owned by exactly one work, + // and reset after that work completes. + // There may be more than one ephemeral timeout active at the same time, + // and this variable is used to track the ownership of ephemeral timeout. + std::chrono::milliseconds ownedEphermeralTimeout_ = + std::chrono::milliseconds(0); + + // Time point representing when the work started. + std::chrono::time_point workStartTime_; + + // Record the sequential number of collective or p2p. + uint64_t seq_; + bool isP2P_; + + // Indicates if the nccl start event has been updated to the store trace. + // This will be used by desync debug. + bool startTraceUpdated_{false}; + + // Record collective sizes for debug. We only record the size on the first + // device as multi-device per process is deprecated + size_t numelIn_ = 0; + size_t numelOut_ = 0; + + // Wrapper method for the static checkForNCCLErrors which can be overridden + // for tests. + virtual std::exception_ptr checkForNCCLErrors(); + + friend std::ostream& operator<<( + std::ostream& output, + const WorkNCCL& workNCCL); + + // Checks for NCCL errors and sets an appropriate exception_ptr. + void checkAndSetException(); + + // Just checks whether GPU execution has started, without modifying + // exception_ptr. + bool startedGPUExecutionInternal() const; + + // Just checks whether GPU execution has completed, without modifying + // exception_ptr. + bool finishedGPUExecutionInternal() const; + + // Reference to the store so that we can write aborted communicators + // to the store. + c10::intrusive_ptr store_; + + // Store a reference to NCCL collective's outputs, used by result and to + // give a more descriptive message when representing the Work as a string. + std::shared_ptr> outputs_; + + // TORCH_NCCL_AVOID_RECORD_STREAMS implementation helper. + // Stores references to participating non-output tensors (ie inputs, + // flattened intermediates). + // We'll clear this list in synchronizeStream, just after user-facing + // stream(s) are synced with the nccl work stream(s). + // By keeping these refs (as well as outputs_) alive until after the + // collective's work rejoins the user-facing streams, we achieve + // caching allocator safety without any recordStream calls. + // For in-place collectives, some refs stashed here may alias outputs_, + // but that doesn't do any harm. + std::shared_ptr stashed_for_allocator_safety_; + + // The future returned by getFuture. + c10::intrusive_ptr future_; + + // the future result (e.g., success or failure) of the work + c10::intrusive_ptr futureWorkResult_; + + bool timingEnabled_; + // unique id used to tell the trace buffer that this + // work has completed + std::optional trace_id_; + std::optional trace_reset_epoch_; + DebugLevel distDebugLevel_; + friend class ProcessGroupNCCL; + }; + + struct Options : Backend::Options { + // NOTE: timeout in ProcessGroupNCCL::Options denote the timeout for + // operations. This is only used when blockingWait_ is enabled. + explicit Options(bool is_high_priority_stream = false); + + // return intrusive_ptr of the object + static c10::intrusive_ptr create( + bool is_high_priority_stream = false) { + return c10::make_intrusive(is_high_priority_stream); + } + + // Schedule NCCL operations on high priority CUDA streams + bool is_high_priority_stream; + +#ifdef NCCL_HAS_CONFIG + // Configure ranks + ncclConfig_t config = NCCL_CONFIG_INITIALIZER; +#endif + + // Optional "parent" backend and color to create communicators from + // via `ncclCommSplit` + c10::intrusive_ptr split_from; + // Color to use for `ncclCommSplit`, values: + // * Non-negative value: in group; + // * NCCL_SPLIT_NOCOLOR (-1): not in group; + // * NCCL_SPLIT_NOCOLOR - 1: uninitialized. + // [Note 1]: the type must be `int` instead of `int64_t` because NCCL API + // accepts int. Otherwise, an implicit conversion may happen at the API call + // and the value may become negative. + // [Note 2]: this member is pybinded to Python, the value passed from Python + // must be within the numerical range of C++ int. Otherwise, Python will + // raise a RuntimeError saying type is incompatible. See also + // `_process_group_color` in `distributed_c10d.py`. +#ifdef NCCL_HAS_COMM_SPLIT + int split_color{NCCL_SPLIT_NOCOLOR - 1}; +#else + // [Note 3]: for older NCCL versions, NCCL_SPLIT_NOCOLOR is not defined. But + // `split_color` is pybinded to Python, so we need to define it. So we use + // the int value of `NCCL_SPLIT_NOCOLOR` (-1) instead. + int split_color{-2}; +#endif + }; + + // Helper class related to TORCH_NCCL_DESYNC_DEBUG + class DesyncDebugger { + public: + // Initialize and enable DesyncDebugger + void init( + int rank, + int size, + int globalRank, + int pgId, + c10::intrusive_ptr store); + + // Run desync debug. This function is called by watchdog at time of timeout. + void run(); + + // Log work start to store. + void logWorkStart(WorkNCCL& work); + + // Log work end to store. + void logWorkEnd(WorkNCCL& work); + + private: + // Whether desync debug is enabled. + // If false, all functions are no-op. + bool enabled_{false}; + + // From ProcessGroupNCCL + int rank_; + int size_; + int globalRank_; + int pgId_; + + // Reference to the store so that we can log start/end event. + c10::intrusive_ptr store_; + + // The store keys to trace the last NCCL collective kernel CUDA events - + // start event and end event respectively. These are used to do desync root + // cause analysis. + std::string traceKeyStart_; + std::string traceKeyEnd_; + }; + + // Class that runs as a separate thread aside from watchdog + // thread because we need to check the heartbeat from watchdog thread + // so that when we get stuck in some NCCL/CUDA calls, + // we can dump the debugging information and abort the process. + class HeartbeatMonitor { + public: + HeartbeatMonitor(ProcessGroupNCCL* pg); + virtual ~HeartbeatMonitor() = default; + + // Start the heartbeat monitor thread. + void start(); + + // Join the heartbeat monitor thread. + void join(); + + // Run the actual loop to check watchdog heartbeat. + virtual void runLoop(); + + // Set the terminal flag and notify the heartbeat monitor thread to stop. + void stop(); + + // Set the last update time of watchdog thread. + void setLastWorkListUpdateTime( + std::chrono::time_point time); + + int getDumpTimeout() const; + + // Util function to get the timeout error message + std::string getNCCLWatchdogTimeoutErrorMsg(const std::string& extraMsg); + + // Util function to get the timeout exit message + std::string getNCCLWatchdogTimeoutExitMsg(const std::string& exitReason); + + protected: + // We need to keep a reference to the PG instance so that we can access + // the member functions of the PG instance. We store a raw pointer on + // purpose because the heartbeat monitor thread now still lives within the + // lifetime of the PG instance. + ProcessGroupNCCL* pg_; + + private: + // Whether or not to print C++ stack traces to logs on unclean shutdown. + bool logCppStackOnUncleanShutdown_; + + // The time interval used for deciding whether there is no watchdog + // heartbeat. + int heartbeatTimeoutInSec_; + + // timeout for the dump to finish. + int waitTimeoutDumpInMilSec_; + + // Interval of check coordinated signals in ProcessGroupNCCL from other + // ranks e.g., trigger the dump of the debugging info for timeout when + // notified. + int coordCheckIntervalMilSec_; + + // We gate the heartbeat monitor thread so that we can roll it out + // gradually. + bool watchdogHeartbeatMonitorEnabled_; + + // Monitor thread which checks the heartbeat of Watchdog thread. + // If the monitor thread finds there is no heartbeat, it will dump debug + // info and then kill the watchdog thread to avoid hang. + std::thread ncclHeartbeatMonitorThread_; + + // Whether or not we should terminate the heartbeat monitoring threads. + std::atomic terminateHeartbeatMonitorThread_{false}; + + // Condition Variable for monitor thread to wake up early + std::condition_variable monitorWakeUpCV_; + + // Whether or not to dump debug info on exception including both watchdog + // timeout and nccl errors. + bool dumpOnTimeoutOrEx_; + + // Mutex to Guard monitorWakeUpCV_ + std::mutex monitorMutex_; + + // The last update time of WorkList inside watchdog thread. + std::chrono::time_point lastWorkListUpdateTime_; + }; + + // Class that runs as a side thread to check whether the NCCL collective + // is timed out or errors on the cached NCCL communicators. + class Watchdog { + public: + Watchdog(ProcessGroupNCCL* pg); + virtual ~Watchdog() = default; + + // Start the watchdog thread. + void start(); + + // Join the watchdog thread. + void join(); + + // Function that runs as part of a separate thread and checks for errors on + // NCCL communicators. We need a separate thread to check for NCCL errors + // since we can't rely on the user calling certain methods like wait(), + // isCompleted() etc. to detect and remediate errors. In addition to this, + // we need a mechanism to safely abort and remove NCCL communicators from + // our cache. This can be done cleanly by having a thread for the + // ProcessGroupNCCL class. Attempting to modify the communicator cache from + // the WorkNCCL class might run into issues with object lifetime since the + // ProcessGroupNCCL object might get destroyed before the WorkNCCL object. + void run(); + + // Watchdog's inside loop. + // Takes care of cleaning up completed work, and aborting upon failure or + // timeout. + void runLoop(); + + // Notify the loop inside watchdog. + void notify(); + + void checkAndSetRemoteError(); + + // A helper function to get the src rank of a signal from the Store. This is + // nonblocking function returning -1 if the signal is not available yet. + int getSignalSrcRank( + c10::intrusive_ptr& store, + const std::string& signal); + + uint64_t getHeartbt() const; + + void setDesyncDebug(bool desyncDebug); + + private: + std::thread ncclCommWatchdogThread_; + + // We need to keep a reference to the PG instance so that we can access + // the member functions of the PG instance. We store a raw pointer on + // purpose because the watchdog thread now still lives within the + // lifetime of the PG instance. + ProcessGroupNCCL* pg_; + + // Whether the NCCL watchdog should rethrow CUDA errors. + bool rethrowCUDAErrors_ = false; + + std::exception_ptr watchDogException_ = nullptr; + + // Condition Variable for watchdog thread sleep + std::condition_variable workMetaListCV_; + + // Heartbeat of watchdog thread. + std::atomic_uint64_t heartbeat_; + + // Whether or not to propagate detected errors to all ranks in the same PG + // through TCPStore. + bool propagatePgError_; + + // Whether or not to enable timeout root cause analysis. + bool desyncDebug_; + + DesyncDebugger desyncDebugger_; + }; + + // If you wish to create multiple process groups, each with a potentially + // different rank and size, you can do so by passing a new store instance + // to each one. If you have only a single store object, you can + // use the `c10d::PrefixStore` to derive scoped instances. + // This is also what the Python API in torch.distributed does. + // + // The process group instance keeps a reference to the store because + // it may be used long after the constructor runs. In fact, the constructor + // doesn't create any NCCL communicators. A single NCCL communicator can + // only be used on a specific set of devices, and are therefore created + // on-demand when a collective runs. If another collective is executed later, + // against a different set of devices, the process group creates another NCCL + // communicator. These NCCL communicators are cached and reused if possible. + // + ProcessGroupNCCL( + c10::intrusive_ptr store, + int rank, + int size, + c10::intrusive_ptr options = Options::create()); + + // This constructor includes the deprecated `groupName` argument. + // If you have existing code that uses the `groupName`, you can replace + // it by specifying a `c10d::PrefixStore(groupName, store)` for store. + C10_DEPRECATED ProcessGroupNCCL( + const c10::intrusive_ptr& store, + int rank, + int size, + const std::string& groupName, + c10::intrusive_ptr options = Options::create()) + : ProcessGroupNCCL(store, rank, size, std::move(options)) {} + + ~ProcessGroupNCCL() override; + + // This function returns a local uid for ProcessGroupNCCL. + uint64_t getUid() { + return static_cast(local_id_); + } + + c10::intrusive_ptr getOptions() { + return options_; + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + const std::string getBackendName() const override { + return std::string(NCCL_BACKEND_NAME); + } + + bool supportsSplitting() const override { + return true; + } + + bool supportsCoalescing() const override { + return true; + } + + bool supportsTimeEstimation() const override { +#ifdef NCCL_SIM_INFO_INITIALIZER + return true; +#else + return false; +#endif + } + + void setTimeout(std::chrono::milliseconds timeout) override { + options_->timeout = timeout; + } + + void startCoalescing() override; + + c10::intrusive_ptr endCoalescing() override; + + void startTimeEstimate(); + + float endTimeEstimate(); + + // For specifying a composite optype, such as ALLGATHER and REDUCE_SCATTER + c10::intrusive_ptr endCoalescing(OpType optype); + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr _broadcast_oop( + at::Tensor& outputTensors, + at::Tensor& inputTensors, + const BroadcastOptions& opts = BroadcastOptions()); + + c10::intrusive_ptr allreduce_sparse( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr _reduce_oop( + at::Tensor& outputTensors, + at::Tensor& inputTensors, + const ReduceOptions& opts = ReduceOptions()); + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputbuffer, + at::Tensor& inputbuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + int64_t getCommPtr(); + + void groupStart(); + + void groupEnd(); + + void groupEndNonblocking(const std::shared_ptr& comm); + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + // Unsupported Ops + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + // Return the total number of splits the communicators held by this process + // group have performed. Counts ncclCommCreateFromRanks() for ncclx v2.21.5+ + uint64_t getCommSplitCounter() const; + + void registerOnCompletionHook( + std::function)>&& hook) override; + void waitForPendingWorks() override; + + void enableCollectivesTiming() override; + + c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) override; + + c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) override; + + // Helper function for iteratively aborting communicators in the provided map + void abortCommsFromMap( + std::unordered_map>& ncclCommsMap, + const std::optional& abortReason); + + c10::intrusive_ptr initIntraNodeComm(); + + // Destroy (shutdown) this backend -- normal exit. + void shutdown() override; + + // Provides an API to abort the ProcessGroup (similar to ncclCommAbort) + // instead of relying on ProcessGroupNCCL destructor. + void abort() override; + + void eagerConnectSingleDevice(at::Device device) override; + + void performNocolorSplit(at::Device device); + + // If all comms on this PG are fully initialized, return true. + bool isInitialized(); + + ErrorType getError() override; + + bool supportsShrinking() const override { +#ifdef NCCL_HAS_COMM_SHRINK + return true; +#else + return false; +#endif + } + + // Backend-style shrink override that returns a Backend instance. + c10::intrusive_ptr shrink( + const std::vector& ranks_to_exclude, + int shrink_flags = 0, + const c10::intrusive_ptr& opts_override = + nullptr) override; + + std::shared_ptr getMemAllocator() override; + + // Allocate tensor from communication-optimized memory pool + at::Tensor allocateTensor(long size, at::TensorOptions options = {}) override; + + // Whether tensor allocation from NCCL memory pool is supported + bool supportsTensorAlloc(c10::DeviceIndex deviceIdx) override; + + // Performs NCCL user buffer registration for all buffers in + // the given MemPool + void registerMemPool(at::cuda::MemPool* pool, bool symm = false); + + // Performs NCCL user buffer de-registration for all buffers in + // the given MemPool + void deregisterMemPool(at::cuda::MemPool* pool); + + // This method adds a temporary extension for the timeout period, + // applying to all collectives between the calling of this API and + // the completion of the first collective on the GPU. While this feature + // provides flexibility in specific scenarios, it introduces statefulness + // to timeout setting. Therefore, it is advisable to use this API sparingly + // and consider alternative approaches, such as directly setting the timeout + // or utilizing a barrier collective (one can set any timeout to the barrier), + // whenever feasible. + void addEphemeralTimeout(const std::chrono::milliseconds& timeout); + + // This function is only intended for testing purposes because we don't + // want to expose the `WorkNCCL` via pybind. It verifies whether the + // `opTimeout_` of the provided WorkNCCL instance is the same as the specified + // timeout. + bool verifyWorkTimeoutForTest( + const c10::intrusive_ptr& work, + const std::chrono::milliseconds& timeout); + + void setEnableNanCheck(bool enableNanCheck); + + protected: + uint64_t getWatchdogHeartbt() const; + + // Instance of the heartbeat monitor thread. + std::unique_ptr heartbeatMonitor_; + + // Instance of the watchdog thread. + std::unique_ptr watchdog_; + + // Helper that broadcasts nccl unique ID to all ranks through the store + void broadcastUniqueNCCLID( + ncclUniqueId* ncclID, + bool isSingleP2POp, + const std::string& devicesKey, + int p2pRank); + + // Helper that allgathers nccl unique IDs to all ranks through the store + void allgatherUniqueNCCLIDs( + int rootIdx, + ncclUniqueId* ncclID, + std::vector& ncclIDs); + + // Helper that looks up the cached NCCL communicators only + std::shared_ptr getNCCLComm(const std::string& deviceKey); + + std::shared_ptr initNCCLComm( + const std::string& deviceKey, + at::Device& device, + OpType opType, + int p2pRank = 0, + bool isSendRecvSelf = false); + + // Initialize device-specific state (comm, stream, event, bookkeeping) for a + // given communicator on this process group instance. + void initializeDeviceStateForComm( + const at::Device& device, + std::shared_ptr comm); + + // Wrapper method which can be overridden for tests. + virtual std::exception_ptr checkForNCCLErrors( + std::shared_ptr& ncclComm); + + // Ensure thaht if record is True, the work obj will be enqueued via + // workEnqueue + virtual c10::intrusive_ptr initWork( + at::Device& device, + int rank, + OpType opType, + bool isP2P, + const char* profilingTitle = nullptr, + const std::vector& inputs = {}, + const std::vector& outputs = {}, + bool record = false); + + // In the timeout case and we will dump debug info such as the NCCL flight + // recorder to storage. Down the road, if we have more complicated or blocking + // operations, we might need to use a side thread to do it. + bool dumpDebuggingInfo( + bool includeStackTrace = true, + bool onlyActive = false); + + void dumpExtraDebuggingInfo(); + + // Abort all communicators on this rank. + bool abortComms(const std::optional& abortReason = std::nullopt); + + // A helper function to check if nonblocking API mode should be used. + // Use this helper instead of directly checking `useNonblocking_` variable. + bool useNonblocking(); + + protected: + int globalRankStart_{}; + int globalRankStride_{}; + + private: + bool eagerInit_{false}; + bool showSerializationWarning_{true}; + + // Helper that encapsulates work shared across all collective communication + // primitives. The callbacks have the following signatures: + // + // ncclResult_t fn(at::Tensor& input, at::Tensor& output, + // ncclComm_t, at::cuda::CUDAStream&); + // void {pre,post}(std::vector); + template + c10::intrusive_ptr collective( + at::Tensor& input, + at::Tensor& output, + Fn fn, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collective( + at::Tensor& input, + at::Tensor& output, + Fn fn, + PreProcess pre, + PostProcess post, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collective( + std::vector& inputs, + std::vector& outputs, + Fn fn, + PreProcess pre, + PostProcess post, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collectiveCoalesced( + std::vector& input, + std::vector& output, + Fn fn, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr); + + // Helper that encapsulates work shared across point-to-point communication + // primitives. It is the same structure as the helper used for collective + // communication primitives. + template + c10::intrusive_ptr pointToPoint( + at::Tensor& tensor, + Fn fn, + int peer, + OpType opType, + const char* profilingTitle = nullptr); + + template + c10::intrusive_ptr pointToPoint( + at::Tensor& tensor, + Fn fn, + int peer, + OpType opType, + PreProcess pre, + PostProcess post, + const char* profilingTitle); + + c10::intrusive_ptr allreduce_impl( + at::Tensor& tensor, + const char* profilingTitle = "nccl:all_reduce", + const AllreduceOptions& opts = AllreduceOptions()); + + // Checks for NCCL errors on each of the communicators and returns an + // appropriate exception_ptr (nullptr if no errors). + static std::exception_ptr checkForNCCLErrorsInternal( + std::shared_ptr& ncclComm); + + // Return the CUDA device most likely associated with this backend. + // If we aren't bound to a specific device, there is no strict + // guarantee that this heuristic is the correct assignment of ranks + // to GPUs that Python layers use, but in practice it tends to be. + // Fortunately we don't rely on this for correctness of any tensor + // operations, just for ancillary uses like barriers. + at::Device guessDeviceForRank() const; + + // Destroys initialized NCCL communicators in devNCCLComMap_ given by input + // key. Throws if there are no communicators to destroy. Also removes + // communicators from the cache and clears used device indices. + void destroyNCCLComms(const std::string& devNCCLCommMapKey); + + void runHookLoop(); + + // Generates a prefix that is unique to this process group and rank, for + // disambiguating logs + std::string createLogPrefix() const; + + // Returns the unique prefix created in createLogPrefix + const std::string& logPrefix() const; + + // Returns the global rank of the device. This function assumes that users + // always create a default global process group(PG) which includes all + // devices. It is called in the constructor of ProcessGroupNCCL, so it always + // return the rank_ of the very first PG created, aka, default global PG. + const int& globalRank() const; + + const c10::intrusive_ptr& globalStore() const; + + // Returns the global ranks of a PG. + const std::vector& groupRanks() const; + + // Util function to assign timeout to each work. + void assignTimeoutToWork( + const c10::intrusive_ptr& work, + const c10::intrusive_ptr& option); + + // Broadcast flight-recorder dump signal + void broadcastDumpSignal(); + + // A helper function to broadcast a signal (key) from a src rank to all other + // ranks using the specified store. + void broadcastSignal( + c10::intrusive_ptr& store, + const std::string& signal, + int srcRank); + + protected: + // Function that directly trigger std::abort so that the whole process + // gets terminated. + virtual void terminateProcess(const std::string& errMsg); + + // A helper function to wait for a future to complete or timeout. + // Returns true if the future completes before timeout, false otherwise. + bool waitForFutureOrTimeout( + std::future& fut, + const std::chrono::milliseconds& timeOutMilSec, + const std::string& futDescription, + ::c10d::C10dLoggingData& debugLog, + bool throwException = false); + + // A helper function to guess the device id of the current rank, based on + // bounded device or used device. Do not use this function if you already know + // the device id to operate on. + c10::DeviceIndex guessDeviceId() const; + + static const int64_t kWatchdogThreadSleepMillis; + + // The store is used to broadcast the NCCL unique ID of rank 0. This store + // comes with prefix and it is different across ProcessGroup NCCL instances + // (aka, different ProcessGroups). + c10::intrusive_ptr store_; + + // Reference to the store without prefix so that keys are same across all + // ProcessGroup NCCL instances and (key, value) pairs written to the store are + // global. + c10::intrusive_ptr globalStore_; + + // The lock which protects the write/read of + // ephemeralTimeoutActive_/ephemeralTimeoutInflight_. + // TODO(fduwjj): We need to have an audit on all mutexes we are adding here. + // And consolidate them if possible. + std::mutex mtxTimeoutExtension_; + + // The ephemeral timeout added on top of existing timeout for works issued + // before first work finishes. + std::chrono::milliseconds ephemeralTimeoutActive_ = + std::chrono::milliseconds(0); + + // The ephemeral timeout addition which has been already applied to work. + std::chrono::milliseconds ephemeralTimeoutInflight_ = + std::chrono::milliseconds(0); + + const c10::intrusive_ptr options_; + + // The number of NCCL communicators that have been created during + // the lifetime of this process group. This sequence number is + // used to scope keys used in the store. + uint64_t ncclCommCounter_{0}; + + // The NCCL communicator that the process group has cached. + // + // For collective operations: + // The key is a list of GPU devices that an operation is operating on + // The GPU devices are stored in a device sequence and the cache NCCL + // communicator is associated with this GPU device sequence + // + // e.g. If the process group op only uses device 0, then the value of + // the used device string stored (value of the hashmap) would be "0". + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 1, 2, 3, 4, 5, 6, 7 separately, + // then the value of the used device string (key) stored would be + // "0,1,2,3,4,5,6,7" + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 4, 5, 6, 7, 1, 2, 3 separately, + // then the value of the used device string stored would be + // "0,4,5,6,7,1,2,3" + // + // Note that the order of the device for the tensor list matters. + // + // For point-to-point operations: + // The key is a string of my current rank and the peer process rank. + // e.g. If process 1 and process 2 are involved in a point-to-point + // communication, the key will be "1:2" on both processes. Note: this is for + // the scenario where there is only 1 GPU per process. When it comes to + // multiple GPUs per process, this part may need to redesigned. + // TODO: we probably need a separate map for P2P comms + std::unordered_map> devNCCLCommMap_; + + // The NCCL communicators currently in process of being initialized. + std::unordered_map> + inInitializationCommMap_; + + // Mutex to guard maps like devNCCLCommMap_. + std::mutex mutex_; + + // Size of ring buffer where we store NCCL Traces for debugging. + int traceBufferSize_; + + // We gate the cudaEventCache so that we can roll it out gradually. + std::atomic cudaEventCacheEnabled_; + + std::thread onCompletionHookThread_; + + // Whether or not we should terminate the watchdog and workCleanup threads. + std::atomic terminateProcessGroup_; + + // Whether there are hooks pending to be fired + std::atomic hasPendingHooks_; + + // This is the signal from watchdog threads to indicate whether the monitor + // thread should dump. Making it static so that it is accessible from all the + // PGs. With this flag, monitor thread would dump debug info under any one of + // the three conditions: + // + // 1: watchdog thread of any PG detects a collective timeout. + // 2: timeout signal is received from other ranks through tcpstore. + // 3: current PG's watchdog heartbeat timeout occurs. + // + // Note that only the monitor thread from PG0 will dump the debug info for + // case one and two so that the debug info is only dumped once. + static std::atomic shouldDump_; + + // Mutex to Guard workMetaList_ + std::mutex workMetaListMutex_; + + bool writeDebugInfo_ = false; + + // Vector to store WorkNCCL pointers + std::list workMetaList_; + + // Mutex to Guard workMetaList_ + std::mutex completedWorkListMutex_; + + // Condition Variable for watchdog thread sleep + std::condition_variable completedWorkListCV_; + + std::list completedWorkList_; + + // Add Work Pointer to workVector + void workEnqueue(const c10::intrusive_ptr&); + + // The CUDA streams used by NCCL kernels + std::unordered_map ncclStreams_; + + // The CUDA events used to sync NCCL streams + std::unordered_map ncclEvents_; + + // Device Indexes used for all collectives in this group + std::set usedDeviceIdxs_; + + // Flag to denote if a coalescing groupStart/groupEnd block is active + int coalescing_state_ = 0; + + // Stores device indexes for all collectives run inside a coalescing block + at::Device coalescedDevice_ = at::Device("cuda"); + + // Stores communicators for all collectives run inside a coalescing block + std::shared_ptr coalescedComm_ = nullptr; + + // Whether the coalesced calls are sync or async. + bool coalescedAsync_{}; + + // keeps track of input and output tensors when coalescing is in flight. Will + // hand over these tensors to WorkNCCL's stash when coalescing is ended. + TensorShelf coalescedTensors_; + + // Some ops may have completed, but user still hasn't called `work.wait()`. + // When watchdog detects this, it transfers the TensorShelf from `work` to + // this `shelves` structure. Next time we execute ProcessGroupNCCL's methods + // on main thread, we clear the `shelves` in one shot. This is mainly because + // watchdog (a side thread) unstashing the shelf directly seems to cause some + // problem. + std::vector> shelvesToUnstash_; + std::mutex shelvesMutex_; + + // Whether or not wait() and synchronize() are blocking operations that wait + // for the operation to complete. + bool blockingWait_ = false; + + // Whether or not the workCleanupThread is used to perform async error + // handling. + ErrorHandlingMode asyncErrorHandling_ = NoHandling; + + ErrorType error_ = ErrorType::SUCCESS; + + std::mutex errorMutex_; + + // Whether or not to sleep after an exception is thrown in the watchdog. + bool sleepAfterException_{}; + + // Whether or not to enable nan check for input tensors to collectives. + bool enableNanCheck_; + + // Whether or not to create start CUDAEvent and enable timing for start + // and end events. Note that enableTiming_ is always true if desyncDebug_ + // is set to true. + std::atomic enableTiming_; + + // Flag to enable the print of hash value of input/output of collectives for + // verification. + std::atomic enableCollectiveHashDebug_; + + // Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set + bool avoidRecordStreams_ = false; + + // The number of active ncclGroupStart() calls. This counter will be increased + // by 1 when ncclGroupStart() is called and decreased by 1 when ncclGroupEnd() + // is called. + static thread_local uint64_t ncclActiveGroupCounter_; + + // Counting for the sequential number of NCCL collective call. + // (specifically, how many actual kernels we launched, which differs from + // op_id_ when coalescing is enabled) + uint64_t seqCollective_{0}; + + // Counting for the sequential number of NCCL P2P calls. + uint64_t seqP2P_{0}; + + // Incrementing counter for logical operations (collective or p2p) issued on + // the ProcessGroup + uint64_t op_id_{0}; + + // The number of ProcessGroupNCCL created on the current rank. + size_t local_id_; + + std::string logPrefix_; + + c10::intrusive_ptr intraNodeComm_; + + // Number of devices on this node. + int localDeviceCount_{0}; + + std::shared_ptr pgStatus_ = + std::make_shared(); + + // Internal cached value: use NCCL non-blocking API mode or not. + // Use `useNonblocking()` method instead of accessing this variable directly. + std::optional useNonblocking_{std::nullopt}; + + // Communication-optimized memory pool associated with this PG + std::unique_ptr memPool_ = nullptr; +}; + +// Reset the flighrecorder recordings for the current rank. +TORCH_API void reset_nccl_trace(); + +// Dumps the NCCL comm traces and additional information about the Process +// Group. +TORCH_API std::string dump_nccl_trace( + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); + +// Dumps the NCCL comm traces and additional information about the Process +// Group in JSON formatted string. +// We don't include stack traces in JSON format as it is far too much data. +TORCH_API std::string dump_nccl_trace_json( + bool includeCollectives, + bool onlyActive); + +// Gets a mutable reference to a global optional function.Heartbeat Monitor +// will use this function to dump traces, if available. Inside fbcode, we +// store a function here that uses an internal tool for process tracing +TORCH_API std::optional< + std::function)>>& +get_cpp_trace_dumper(); + +// Similar to get_cpp_trace_dumper, this stores a function defined in +// torch-python layer that lets us check whether the GIL can be acquired, +// helpful for instrumenting in cases where a hang was observed. +typedef bool (*gil_checker_t)(); + +TORCH_API gil_checker_t& get_gil_checker(); +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp new file mode 100644 index 0000000000000000000000000000000000000000..12e737b61df23a0fc5503b60fc7a6136d311aaeb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp @@ -0,0 +1,363 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#ifdef USE_CUDA +#include +#include +#endif + +namespace c10d { + +#define TORCH_UCC_DEVICE_NOT_SET -2 + +#ifdef USE_CUDA +#define SAVE_TENSORS(_TENSORS, _DATA) \ + do { \ + if ((_TENSORS)[0].device().is_cuda()) { \ + for (const auto i : c10::irange((_TENSORS).size())) { \ + c10::cuda::CUDACachingAllocator::recordStream( \ + (_TENSORS)[i].storage().data_ptr(), (*stream)); \ + } \ + } else { \ + (_DATA) = (_TENSORS); \ + } \ + } while (0) + +#else +#define SAVE_TENSORS(_TENSORS, _DATA) (_DATA) = (_TENSORS); +#endif + +constexpr const char* UCC_BACKEND_NAME = "ucc"; + +struct event_pool_t { +#ifdef USE_CUDA + std::queue> event_pool; +#endif + std::mutex event_pool_mutex; +}; + +class Comm; + +// UCC does not support multiple CUDA devices per process. +class TORCH_API ProcessGroupUCC : public Backend { + private: + void set_timeout(ucc_coll_args_t& args); + + public: + class WorkData { + public: + std::vector src; + std::vector dst; + std::vector flat; + WorkData() {} + virtual ~WorkData() = default; + }; + class AlltoallWorkData : public WorkData { + public: + AlltoallWorkData(int size) + : send_lengths(size), + send_offsets(size), + recv_lengths(size), + recv_offsets(size) {} + std::vector send_lengths; + std::vector send_offsets; + std::vector recv_lengths; + std::vector recv_offsets; + }; + + class AllgathervWorkData : public WorkData { + public: + AllgathervWorkData(int size) : recv_lengths(size), recv_offsets(size) {} + std::vector recv_lengths; + std::vector recv_offsets; + }; + + class ScattervWorkData : public WorkData { + public: + ScattervWorkData(int size) : send_lengths(size), send_offsets(size) {} + std::vector send_lengths; + std::vector send_offsets; + }; + + class ProgressEntry { + friend class ProcessGroupUCC; + friend class Comm; + + public: + ProgressEntry(CommBase* comm, ucc_coll_req_h request) + : status_(UCC_INPROGRESS), comm_(comm), request_(request) {} + // Finalizes UCC status or exception of collective request. + void finalize(std::exception_ptr eptr = nullptr); + ucc_status_t status_; + CommBase* comm_; + ucc_coll_req_h request_; + std::unique_ptr data; + c10::intrusive_ptr future_; + std::exception_ptr eptr_; + }; + + class WorkUCC : public Work { + friend class ProcessGroupUCC; + friend class Comm; + + public: + WorkUCC( + OpType opType, + uint64_t seq, + const char* prof_title, + const std::optional>& inputs, + const c10::intrusive_ptr& logger) + : Work(-1, opType, prof_title, inputs), logger_(logger), seq_(seq) {} + ~WorkUCC(); + void setException(); + void setAndThrowException(); + bool isCompleted() override; + bool isSuccess() const override; + bool wait(std::chrono::milliseconds timeout = kUnsetTimeout) override; + c10::intrusive_ptr getFuture() override; + std::vector result() override; + int sourceRank() const override; +#ifdef USE_CUDA + std::unique_ptr fence = nullptr; + event_pool_t* ep = nullptr; +#endif + int sourceRank_; + + protected: + std::shared_ptr entry_; + c10::intrusive_ptr logger_; + uint64_t seq_; + + private: + // The future returned by getFuture. + c10::intrusive_ptr future_; + // Store a reference to collective's outputs, used by result + std::shared_ptr> outputs_; + }; + + explicit ProcessGroupUCC( + const c10::intrusive_ptr& store, + int rank = -1, + int size = -1, + std::chrono::duration timeout = kBackendDefaultTimeout); + + void initComm(c10::Device dev); + + ~ProcessGroupUCC() override; + + const std::string getBackendName() const override { + return std::string(UCC_BACKEND_NAME); + } + +#ifdef USE_CUDA + std::unique_ptr getPooledEvent(); +#endif + + // Performs a health check by initializing dummy UCC & UCX communicators and + // then destroying them. This will help indicate and signal any + // UCC/UCX-related issues prior to the first collective. The actual + // initialization and subsequent destruction is ran on a separate thread and + // the main thread is signalled about timeouts/errors to report to the + // application. + void runHealthCheck(); + + template + c10::intrusive_ptr collective_post( + OpType opType, + PreProcess preproc, + PostProcess postproc, + ucc_coll_args_t& coll, + std::unique_ptr data, + c10::Device dev, + std::vector& inputTensors, + std::vector& outputTensors, + const char* prof_title); + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + // Counting for the sequential number of UCC collective_post call. + uint64_t seq_{0}; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + static c10::intrusive_ptr createProcessGroupUCC( + const c10::intrusive_ptr<::c10d::Store>& store, + int rank, + int size, + const std::chrono::duration& timeout); + + protected: + const std::chrono::duration timeout_; + std::shared_ptr oob; + std::shared_ptr comm = {nullptr}; + uint32_t comm_id; + ucc_team_h team{nullptr}; + ucc_ee_h cuda_ee{nullptr}; + ucc_ee_h cuda_ee_p2p[2]{nullptr, nullptr}; + +#ifdef USE_CUDA + std::unique_ptr stream = nullptr; + std::unique_ptr stream_p2p[2] = {nullptr, nullptr}; + event_pool_t ep; +#endif + c10::intrusive_ptr logger; +}; + +class Comm { + c10::intrusive_ptr logger; + std::shared_ptr oob; + CommUCC ucc_comm; + std::mutex mutex; + std::thread progress_thread; + std::condition_variable queue_produce_cv; + std::condition_variable queue_consume_cv; + std::deque> progress_queue; + bool stop_progress_loop; + bool collective_inprogress; + torch_ucc_phase_t finalize_phase; + + public: + c10::DeviceIndex cuda_device_index; + Comm( + const c10::intrusive_ptr& logger, + std::shared_ptr oob, + c10::Device dev, + bool is_health_check); + + ~Comm(); + + void ucc_create_team( + ucc_team_h& team, + std::shared_ptr oob); + + void ucc_destroy_team(ucc_team_h& team); + + c10::intrusive_ptr enqueue_p2p( + OpType opType, + ucc_coll_req_h request, + const char* prof_title); + +#ifdef USE_CUDA + void enqueue_cuda_collective( + std::unique_ptr data, + c10::intrusive_ptr work, + ucc_coll_args_t& coll, + ucc_team_h team, + ucc_ee_h ee); +#endif + + void enqueue_collective( + std::unique_ptr data, + c10::intrusive_ptr work, + ucc_coll_args_t& coll, + ucc_team_h team); + + static std::shared_ptr get_comm( + uint32_t& id, + c10::Device dev, + std::shared_ptr oob, + const c10::intrusive_ptr& logger, + bool is_health_check = false); + + void progress_loop(); +}; + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..29c66431b1aba7bd74be896f4bf4d070c1f59d26 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp @@ -0,0 +1,145 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include +#include + +namespace c10d { + +class TORCH_API ProcessGroupWrapper : public Backend { + public: + explicit ProcessGroupWrapper( + const c10::intrusive_ptr& backend, + c10::intrusive_ptr glooBackend); + + const std::string getBackendName() const override; + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& data, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + // This function is deprecated and will be moved out of ProcessGroup to comms: + // * do not add dependencies on this function, + // * do not implement it in your ProcessGroup, implement _allgather_base + // instead. + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + void monitoredBarrier(const BarrierOptions& opts, bool waitAllRanks = false) + override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + // dont implement this + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; // just call underlying + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const ReduceScatterOptions& opts) override; + + void startCoalescing() override; + + c10::intrusive_ptr endCoalescing() override; + + c10::intrusive_ptr getWrappedPg() const; + + private: + // Underlying process group that actual application collectives will be + // dispatched to + c10::intrusive_ptr backend_; + // Gloo process group responsible for internal coordination such as monitored + // barrier, sequence number checking, collective fingerprint collecting. + c10::intrusive_ptr glooBackend_; + // Conducts several checks to ensure that the underlying collective is well + // formed with the goal of notifying the user about incorrect collective use + // in the application. + void runCollectiveChecks( + OpType op_type, + const std::vector& tensors); +}; +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dd974d19037a5e7ae362dd5c4218c4bfcf4ea34f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp @@ -0,0 +1,361 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { + +// PyProcessGroup is a pybind11 trampoline class to allow a Python +// class to inherit from torch.distributed.ProcessGroup +class PyProcessGroup : public ProcessGroup { + public: + // PyWork is a pybind11 trampoline class to allow a Python + // class to inherit from torch.distributed.Work + class TORCH_PYTHON_API PyWork : public Work { + public: + PyWork() = default; + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + PYBIND11_OVERRIDE( + bool, /* Return type */ + Work, /* Parent class */ + wait, /* Name of function in C++ */ + timeout); + } + + c10::intrusive_ptr getFuture() override { + // We cannot use PYBIND11_OVERRIDE because: + // 1. We have to >MANUALLY< unwrap the PyFutureWrapper and + // 2. The python name is get_future + pybind11::gil_scoped_acquire gil; + auto override = + pybind11::get_override(static_cast(this), "get_future"); + + if (override) { + py::object o = override(); + auto futWrapper = + o.cast>(); + return futWrapper->fut; + } + + return Work::getFuture(); + } + }; + +#define WORK_OVERRIDE(cname, name, ...) \ + do { \ + pybind11::gil_scoped_acquire gil; \ + pybind11::function override = \ + pybind11::get_override(static_cast(this), #name); \ + if (override) { \ + auto o = override(__VA_ARGS__); \ + return c10::make_intrusive(o); \ + } \ + return cname::name(__VA_ARGS__); \ + } while (false) + + // This class is used to wrap a PyWork trampoline with it's corresponding + // Python object to prevent the Python object from being garbage collected. + class PyWorkHolder : public Work { + public: + PyWorkHolder(const c10::intrusive_ptr& work, py::object pyWork) + : work_(work), pyWork_(std::move(pyWork)) {} + + PyWorkHolder(py::object pyWork) + : work_(pyWork.cast>()), + pyWork_(std::move(pyWork)) {} + + ~PyWorkHolder() override { + // GIL must be held when freeing python objects. + py::gil_scoped_acquire gil; + pyWork_ = py::object(); + } + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return work_->wait(timeout); + } + + c10::intrusive_ptr getFuture() override { + return work_->getFuture(); + } + + private: + c10::intrusive_ptr work_; + py::object pyWork_; + }; + + using ProcessGroup::ProcessGroup; + + const std::string getBackendName() const override { + PYBIND11_OVERRIDE( + std::string, /* Return type */ + ProcessGroup, /* Parent class */ + getBackendName, /* Name of function in C++ */ + ); + } + + int getRank() const override { + PYBIND11_OVERRIDE( + int, /* Return type */ + ProcessGroup, /* Parent class */ + getRank, /* Name of function in C++ */ + ); + } + + int getSize() const override { + PYBIND11_OVERRIDE( + int, /* Return type */ + ProcessGroup, /* Parent class */ + getSize, /* Name of function in C++ */ + ); + } + + void abort() override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + abort, /* Name of function in C++ */ + ); + } + + const std::string& getGroupName() const override { + PYBIND11_OVERRIDE( + const std::string&, /* Return type */ + ProcessGroup, /* Parent class */ + getGroupName, /* Name of function in C++ */ + ); + } + + void setGroupName(const std::string& group_name) override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + setGroupName, /* Name of function in C++ */ + group_name); + } + + const std::string& getGroupDesc() const override { + PYBIND11_OVERRIDE( + const std::string&, /* Return type */ + ProcessGroup, /* Parent class */ + getGroupDesc, /* Name of function in C++ */ + ); + } + + void setGroupDesc(const std::string& group_desc) override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + setGroupDesc, /* Name of function in C++ */ + group_desc); + } + + c10::intrusive_ptr splitGroup( + const std::vector& ranks, + const std::optional& timeout, + const std::optional>& opts, + const std::optional& group_name, + const std::optional& group_desc) override { + PYBIND11_OVERRIDE( + c10::intrusive_ptr, /* Return type */ + ProcessGroup, /* Parent class */ + splitGroup, /* Name of function in C++ */ + ranks, + timeout, + opts, + group_name, + group_desc); + } + + c10::intrusive_ptr mergeRemoteGroup( + const c10::intrusive_ptr& store, + const MergeOptions& opts, + const int& size) override { + PYBIND11_OVERRIDE( + c10::intrusive_ptr, /* Return type */ + ProcessGroup, /* Parent class */ + mergeRemoteGroup, /* Name of function in C++ */ + store, + opts, + size); + } + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allgather, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allgather_into_tensor_coalesced, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override { + WORK_OVERRIDE( + // py::object, /* Return type */ + ProcessGroup, /* Parent class */ + allreduce, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allreduce_coalesced, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + alltoall_base, /* Name of function in C++ */ + outputBuffer, + inputBuffer, + outputSplitSizes, + inputSplitSizes, + opts); + } + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + barrier, /* Name of function in C++ */ + opts); + } + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + broadcast, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + reduce_scatter, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + reduce_scatter_tensor_coalesced, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + send, /* Name of function in C++ */ + tensors, + dstRank, + tag); + } + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + recv, /* Name of function in C++ */ + tensors, + srcRank, + tag); + } +}; + +class TORCH_PYTHON_API PythonOnCompletionHook { + public: + // Wraps a py::object hook and acquires Python GIL in dtor before + // destructing the hook object. + PythonOnCompletionHook(py::object hook) : hook_(std::move(hook)) {} + PythonOnCompletionHook(const PythonOnCompletionHook&) = default; + + // NOLINTNEXTLINE(bugprone-exception-escape) + ~PythonOnCompletionHook() { + py::gil_scoped_acquire ag; + hook_.dec_ref(); + // Explicitly set hook_ to nullptr to prevent py::object's dtor + // to decref on the PyObject again. + // See Note [Destructing py::object] in python_ivalue.h + hook_.ptr() = nullptr; + } + + void operator()(const std::shared_ptr& workInfo) const { + std::exception_ptr eptr; + { + py::gil_scoped_acquire acquire; + try { + hook_(workInfo); + } catch (py::error_already_set& e) { + // py::error_already_set requires GIL to destruct, take + // special care. + eptr = std::make_exception_ptr(std::runtime_error(e.what())); + e.restore(); + PyErr_Clear(); + } catch (std::exception&) { + eptr = std::current_exception(); + } + } + // No more Python-related stuff at this point, i.e., this + // exception can be captured and handled by PG backend. + if (eptr) + std::rethrow_exception(eptr); + } + + private: + py::object hook_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..de3e5e2f95ff92bec1e9bbc6c457c777cb4f578d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp @@ -0,0 +1,78 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) + +#pragma once + +#include + +#include + +namespace c10d { + +// `RankLocal` maintains a unique instance of T for each non-autograd thread. +// For non-autograd threads, `RankLocal::get()` functions similar to +// thread_local. For autograd threads, `RankLocal::get()` returns the +// instance of T corresponding to the enqueuing non-autograd thread. The +// mechanism allows for rank-specific context shared between forward and +// backward. It works for both the one-rank-per-process and one-rank-per-thread +// scenarios. +// +// NOTE: RankLocal doesn't make the underlying objects thread-safe. +template +class RankLocal { + public: + RankLocal(const RankLocal&) = delete; + RankLocal& operator=(const RankLocal&) = delete; + + static T& get() { + // Fast path: non-autograd threads can simply return + // the object reference cached in TLS. + if (cached_ != nullptr) { + return *cached_; + } + const auto node = torch::autograd::get_current_node(); + auto fwd_thread_id = node == nullptr ? at::RecordFunction::currentThreadId() + : node->thread_id(); + // Optimistically acquire the read lock first, since most likely we are in + // an autograd thread and the object has already been constructed. + { + std::shared_lock read_lock(lock_); + auto it = thread_id_to_rank_local_.find(fwd_thread_id); + if (it != thread_id_to_rank_local_.end()) { + // Cache for non-autograd threads + if (node == nullptr) { + cached_ = &it->second; + } + return it->second; + } + } + + std::unique_lock write_lock(lock_); + auto [it, _] = thread_id_to_rank_local_.try_emplace(fwd_thread_id); + // Cache for non-autograd threads + if (node == nullptr) { + cached_ = &it->second; + } + return it->second; + } + + private: + RankLocal() = default; + thread_local static T* cached_; + static std::unordered_map thread_id_to_rank_local_; + static std::shared_mutex lock_; +}; + +template +thread_local T* RankLocal::cached_ = nullptr; + +template +std::unordered_map RankLocal::thread_id_to_rank_local_; + +template +std::shared_mutex RankLocal::lock_; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp new file mode 100644 index 0000000000000000000000000000000000000000..641bc6a36aebde20dfd33309da1609a55e27dea6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp @@ -0,0 +1,159 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +// callback function will be given arguments (std::optional oldValue, +// std::optional newValue) +using WatchKeyCallback = + std::function, std::optional)>; + +class TORCH_API Store : public torch::CustomClassHolder { + public: + static constexpr std::chrono::milliseconds kDefaultTimeout = + std::chrono::seconds(300); + static constexpr std::chrono::milliseconds kNoTimeout = + std::chrono::milliseconds::zero(); + + Store() : timeout_(kDefaultTimeout) {} + + explicit Store(const std::chrono::milliseconds& timeout) + : timeout_(timeout) {} + + Store(const Store&) = default; + Store(Store&&) noexcept = default; + + ~Store() override = default; + + // Clone a thread safe copy of this store object that points to the same + // underlying store. + virtual c10::intrusive_ptr clone() = 0; + + void set(const std::string& key, const std::string& value); + + virtual void set( + const std::string& key, + const std::vector& value) = 0; + + std::string compareSet( + const std::string& key, + const std::string& currentValue, + const std::string& newValue); + + virtual std::vector compareSet( + const std::string& key, + const std::vector& currentValue, + const std::vector& newValue) { + C10_THROW_ERROR(NotImplementedError, "Not implemented."); + } + + std::string get_to_str(const std::string& key); + + virtual std::vector get(const std::string& key) = 0; + + virtual int64_t add(const std::string& key, int64_t value) = 0; + + virtual bool deleteKey(const std::string& key) = 0; + + virtual bool check(const std::vector& keys) = 0; + + virtual int64_t getNumKeys() = 0; + + virtual void wait(const std::vector& keys) = 0; + + virtual void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) = 0; + + virtual const std::chrono::milliseconds& getTimeout() const noexcept; + + virtual void setTimeout(const std::chrono::milliseconds& timeout); + + // watchKey() is deprecated and no longer supported. + virtual void watchKey( + const std::string& /* unused */, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + WatchKeyCallback /* unused */) { + C10_THROW_ERROR( + NotImplementedError, + "watchKey is deprecated, no implementation support it."); + } + + virtual void append( + const std::string& key, + const std::vector& value); + + virtual std::vector> multiGet( + const std::vector& keys); + + virtual void multiSet( + const std::vector& keys, + const std::vector>& values); + + // Returns true if this store support append, multiGet and multiSet + virtual bool hasExtendedApi() const; + + virtual void queuePush( + const std::string& key, + const std::vector& value) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual std::vector queuePop(const std::string& key, bool block) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual int64_t queueLen(const std::string& key) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual std::vector listKeys() { + C10_THROW_ERROR( + NotImplementedError, "listKeys support is not implemented."); + } + + protected: + std::chrono::milliseconds timeout_; +}; + +/* +StoreTimeoutGuard is a RAII guard that will set the store timeout and restore it +when it returns. +*/ +class StoreTimeoutGuard { + public: + explicit StoreTimeoutGuard( + Store& store, + const std::chrono::milliseconds& timeout) + : store_(store), oldTimeout_(store.getTimeout()) { + store.setTimeout(timeout); + } + + ~StoreTimeoutGuard() { + store_.setTimeout(oldTimeout_); + } + + /* Disabling copy and move semantics */ + StoreTimeoutGuard(const StoreTimeoutGuard&) = delete; + StoreTimeoutGuard& operator=(const StoreTimeoutGuard&) = delete; + StoreTimeoutGuard(StoreTimeoutGuard&&) = delete; + StoreTimeoutGuard& operator=(StoreTimeoutGuard&&) = delete; + + private: + Store& store_; + std::chrono::milliseconds oldTimeout_{}; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea3272d25b465ae53a8b73f8b440253892a92e41 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { +namespace detail { + +// TCPStore is a key-value store used by PyTorch mainly for distributed +// rendezvous, but for other purposes as well. (e.g., a centralized storage for +// synchronization among different processes.) +// +// It is run via a classic client-server architecture, where the server runs +// a separate background thread (alternatively we call it daemon thread). The +// client and server communicate via TCP sockets. +// +// Currently we have two types of server backends: +// 1. TCPStoreBackend: a single thread to handle all incoming request +// synchronously. +// 2. LibUVTCPStoreBackend: an event-driven asynchronous stream processing that +// leverages libuv library (https://github.com/libuv/libuv) for better +// performance. And this backend now is recommended to users. (We set the +// default value of `useLibUV` inside `TCPStoreOptions` to true now, so users +// should get it by default). +// +// Code structure: +// ├── TCPStore client side API and server setup code: +// │ TCPStore.hpp/TCPStore.cpp +// ├── TCPStoreBackend server side API implementation code: +// │ TCPStoreBackend.hpp/TCPStoreBackend.cpp +// | (actual class:`TCPStoreMasterDaemon`) +// ├── LibUVTCPStoreBackend +// │ TCPStoreLibUvBackend.cpp +// | (actual class: `LibUVStoreDaemon`) + +class TCPServer; + +class TCPClient; + +struct SocketAddress { + std::string host; + std::uint16_t port{}; +}; + +} // namespace detail + +struct TCPStoreOptions { + static constexpr std::uint16_t kDefaultPort = 29500; + + std::uint16_t port = kDefaultPort; + bool isServer = false; + std::optional numWorkers = std::nullopt; + bool waitWorkers = true; + std::chrono::milliseconds timeout = Store::kDefaultTimeout; + + // A boolean value indicating whether multiple store instances can be + // initialized with the same host:port pair. + bool multiTenant = false; + + // If specified, and if isServer is true, the underlying TCPServer will take + // over the bound socket associated to this fd. This option is useful to avoid + // port assignment races in certain scenarios. + std::optional masterListenFd = std::nullopt; + + // A boolean value indicating whether to use the experimental libUV backend. + bool useLibUV = true; +}; + +class TORCH_API TCPStore : public Store { + public: + static constexpr std::chrono::milliseconds kConnectRetryDelay{1000}; + + explicit TCPStore(std::string host, const TCPStoreOptions& opts = {}); + + ~TCPStore() override; + + c10::intrusive_ptr clone() override; + + void set(const std::string& key, const std::vector& value) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + bool deleteKey(const std::string& key) override; + + bool check(const std::vector& keys) override; + + int64_t getNumKeys() override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + std::vector listKeys() override; + + // Waits for all workers to join. + void waitForWorkers(); + + // Returns the hostname used by the TCPStore. + const std::string& getHost() const noexcept { + return addr_.host; + } + + // Returns the port used by the TCPStore. + std::uint16_t getPort() const noexcept { + return addr_.port; + } + + bool isLibUvBackend() const noexcept { + return usingLibUv_; + } + + // note(xilunwu): this function is only for internal testing + void _splitSet(const std::string& key, const std::vector& data); + + std::string repr() const; + + private: + int64_t incrementValueBy(const std::string& key, int64_t delta); + + void ping(); + void validate(); + + std::vector doGet(const std::string& key); + + void doWait( + c10::ArrayRef keys, + std::chrono::milliseconds timeout); + + detail::SocketAddress addr_; + std::shared_ptr server_; + std::unique_ptr client_; + std::optional numWorkers_; + + const std::string initKey_ = "init/"; + const std::string keyPrefix_ = "/"; + std::mutex activeOpLock_; + bool usingLibUv_ = true; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bed050e1cb26c70b5955a5d29f521be86bf6194a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#endif + +namespace c10d::detail { + +// Magic number for client validation. +static const uint32_t validationMagicNumber = 0x3C85F7CE; + +enum class QueryType : uint8_t { + VALIDATE, + SET, + COMPARE_SET, + GET, + ADD, + CHECK, + WAIT, + GETNUMKEYS, + DELETE_KEY, + APPEND, + MULTI_GET, + MULTI_SET, + CANCEL_WAIT, + PING, + QUEUE_PUSH, + QUEUE_POP, + QUEUE_LEN, + LIST_KEYS, +}; + +enum class CheckResponseType : uint8_t { READY, NOT_READY }; + +enum class WaitResponseType : uint8_t { STOP_WAITING, WAIT_CANCELED }; + +// Abstract base class to handle thread state for TCPStoreMasterDaemon. +// Contains the windows/unix implementations to signal a +// shutdown sequence for the thread +class BackgroundThread { + public: + explicit BackgroundThread(); + + virtual ~BackgroundThread() = 0; + virtual std::uint16_t port() const = 0; + + void start(); + bool stop_requested(); + + protected: + void dispose(); + virtual void run() = 0; + virtual void stop() = 0; + bool is_running() { + return is_running_.load(); + } + + private: + std::atomic is_running_{false}; + std::thread daemonThread_; +}; + +std::unique_ptr create_tcpstore_backend( + const TCPStoreOptions& opts); +std::unique_ptr create_libuv_tcpstore_backend( + const TCPStoreOptions& opts); +bool is_libuv_tcpstore_backend_available(); + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..8e584ac92c5c2a8aaf092cd25fc273f1fe1740f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h @@ -0,0 +1,324 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include // optional, for ostream fallback +#include // for fmt::join + +#include +#include +#include +#include +#include +#include + +namespace c10d { + +inline std::string getTraceStartKey(const std::string& pgName, int rank) { + return fmt::format(FMT_COMPILE("{}_{}_trace_start"), pgName, rank); +} + +inline std::string getTraceEndKey(const std::string& pgName, int rank) { + return fmt::format(FMT_COMPILE("{}_{}_trace_end"), pgName, rank); +} + +inline bool traceUpdate( + c10::intrusive_ptr& store, + const std::string& key, + uint64_t seq, + const std::string& col) { + std::vector value(col.size() + sizeof(seq) + 1); + std::memcpy(value.data(), &seq, sizeof(seq)); + std::memcpy(value.data() + sizeof(seq), col.data(), col.size()); + try { + store->set(key, value); + return true; + } catch (...) { + LOG(ERROR) << "Store is down while updating #" << seq << " with key " + << key; + return false; + } + return true; +} + +enum TraceDebugEvent { + kEventStart, + kEventEnd, +}; +// >> +using TraceMap = + std::map>>; + +inline std::string ranksToString(const std::vector& ranks) { + return fmt::to_string(fmt::join(ranks, ", ")); +} + +inline std::string ranksFromTrace( + const std::vector>& items) { + fmt::memory_buffer buf; + bool first = true; + for (const auto& [rank, _] : items) { + if (!first) { + fmt::format_to(std::back_inserter(buf), ", "); + } + fmt::format_to(std::back_inserter(buf), "{}", rank); + first = false; + } + return fmt::to_string(buf); +} + +inline std::string analyzeMissingRanks(const std::vector& missingRanks) { + return c10::str( + "\n\t - To our best knowledge, ranks [", + ranksToString(missingRanks), + "] are the lagging ranks that caused this timeout. " + "They never joined any collectives"); +} + +inline std::string analyzeLaggingRanks(const TraceMap& traceMap) { + uint64_t lagSeq = traceMap.begin()->first; + std::vector startRanks; + std::vector endRanks; + for (auto& p : traceMap.begin()->second) { + if (p.second.second == kEventStart) { + startRanks.push_back(p.first); + } else { + endRanks.push_back(p.first); + } + } + std::string report = + "\n\t - To our best knowledge, the lagging/dead/mismatched ranks " + "that caused the desync are:"; + if (!startRanks.empty()) { + report += c10::str( + "\n\t - [", + ranksToString(startRanks), + "] joined but didn't finish collective #", + lagSeq, + " (count from 1)"); + } + if (!endRanks.empty()) { + report += c10::str( + "\n\t [", + ranksToString(endRanks), + "] finished collective #", + lagSeq, + ", but didn't join collective #", + lagSeq + 1, + " (count from 1)"); + } + return report; +} + +inline std::string dumpSnapshot(TraceMap& traceMap) { + std::string report = "\n\t - Snapshot of ranks' latest states:"; + for (auto& tracePair : traceMap) { + uint64_t seq = tracePair.first; + std::map>& subMap = + tracePair.second; + + std::unordered_map> collectivesStart; + std::unordered_map> collectivesEnd; + for (const auto& p : subMap) { + int rank = p.first; + const std::string& col = p.second.first; + if (p.second.second == kEventStart) { + collectivesStart[col].push_back(rank); + } else { + collectivesEnd[col].push_back(rank); + } + } + + if (!collectivesStart.empty()) { + report += c10::str("\n\t #", seq, " started ranks:"); + for (auto& mapPair : collectivesStart) { + report += c10::str( + "\n\t [", + ranksToString(mapPair.second), + "] started ", + mapPair.first); + } + } + if (!collectivesEnd.empty()) { + report += c10::str("\n\t #", seq, " finished ranks:"); + for (auto& mapPair : collectivesEnd) { + report += c10::str( + "\n\t [", + ranksToString(mapPair.second), + "] finished ", + mapPair.first); + } + } + } + return report; +} + +inline bool parseTraceValue( + c10::intrusive_ptr& store, + const std::string& key, + uint64_t& seq, + std::string& col) { + try { + std::vector traceValue = store->get(key); + std::memcpy(&seq, traceValue.data(), sizeof(seq)); + std::string colName((char*)traceValue.data() + sizeof(seq)); + col = colName; + return true; + } catch (...) { + LOG(ERROR) << "Store is down while getting key " << key; + return false; + } + return true; +} + +inline std::string retrieveDesyncReport( + c10::intrusive_ptr& store, + const std::string& pgName, + int myRank, + int worldSize) { + std::string report; + + uint64_t thisSeq = 0; + std::string thisCol; + + std::vector missingRanks; + TraceMap traceMap; + + for (const auto rank : c10::irange(worldSize)) { + // Build traceMapStart. + uint64_t seqStart = 0; + { + std::string traceKeyStart = getTraceStartKey(pgName, rank); + if (!store->check({traceKeyStart})) { + missingRanks.push_back(rank); + continue; + } + std::string col; + if (!parseTraceValue(store, traceKeyStart, seqStart, col)) { + return report; + } + traceMap[seqStart].emplace(rank, std::make_pair(col, kEventStart)); + if (rank == myRank) { + thisSeq = seqStart; + thisCol = std::move(col); + } + } + + // Build traceMapEnd. + { + std::string traceKeyEnd = getTraceEndKey(pgName, rank); + if (!store->check({traceKeyEnd})) { + continue; + } + uint64_t seq = 0; + std::string col; + if (!parseTraceValue(store, traceKeyEnd, seq, col)) { + return report; + } + if (seq == seqStart) { + traceMap[seq][rank].second = kEventEnd; + } + } + } + + TORCH_INTERNAL_ASSERT( + !missingRanks.empty() || !traceMap.empty(), + "Trace shouldn't be empty while enabled GLOO_ASYNC_TIMEOUT_DEBUG"); + TORCH_INTERNAL_ASSERT( + !thisCol.empty(), + "Timeout rank [", + myRank, + "] must have collective tracking iteam in c10::Store trace"); + TORCH_INTERNAL_ASSERT( + traceMap[thisSeq][myRank].second == kEventStart, + "Timeout rank [", + myRank, + "] last trace item must be kEventStart. thisSeq = ", + thisSeq, + ", col = ", + thisCol); + + report += c10::str( + "\n\t - [", myRank, "] Timeout at collective: ", thisCol, ", #", thisSeq); + + if (!missingRanks.empty()) { + report += analyzeMissingRanks(missingRanks); + } else { + report += analyzeLaggingRanks(traceMap); + report += dumpSnapshot(traceMap); + } + + return report; +} + +inline std::string pickle_str(const c10::IValue& v) { + std::vector result; + { + auto writer = [&](const char* data, size_t size) { + result.insert(result.end(), data, data + size); + }; + torch::jit::Pickler pickler( + writer, nullptr, nullptr, nullptr, nullptr, false); + pickler.protocol(); + pickler.pushIValue(v); + pickler.stop(); + } + return std::string(result.begin(), result.end()); +} + +inline std::string get_python_cpp_trace() { + // usage: + // LOG(INFO) << "stacktrace: " + // << get_python_cpp_trace(); + // warn: might be slow in getting cpp traces + // because of slow/broken addr2line + // in different system libs + std::shared_ptr tb = + torch::CapturedTraceback::gather( + /*python=*/true, /*script=*/true, /*cpp=*/true); + torch::SymbolizedTracebacks s_tbs = torch::symbolize({tb.get()}); + const auto& s_tb = s_tbs.tracebacks.at(0); + constexpr auto TB_FMT_CSTR = FMT_COMPILE("#{} {} from {}:{}\n"); + fmt::memory_buffer buf; + auto buf_iter = std::back_inserter(buf); + for (auto idx : c10::irange(s_tb.size())) { + auto frame_id = s_tb[idx]; + const auto& frame = s_tbs.all_frames.at(frame_id); + fmt::format_to( + buf_iter, + TB_FMT_CSTR, + idx, + frame.funcname, + frame.filename, + frame.lineno); + } + return fmt::to_string(buf); +} + +inline c10::Dict new_dict() { + return c10::Dict( + c10::AnyType::get(), c10::AnyType::get()); +} + +inline c10::List new_list() { + return c10::List(c10::AnyType::get()); +} + +inline std::string ranks_str(const std::vector& ranks) { + return fmt::format("[{}]", fmt::join(ranks, ", ")); +} + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c02edd2ef0eb6f4a04b6abe2976d1c9cd20e2e90 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp @@ -0,0 +1,190 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include + +#include +#include + +namespace c10d { + +// Base class for supplementary data potentially needed by ReduceOps +struct TORCH_API _SupplementBase : torch::CustomClassHolder { + ~_SupplementBase() override = default; +}; + +// Supplementary data specific to NCCL PREMUL_SUM +// The point of use in ProcessGroupNCCL knows how to unpack it. +struct NCCLPreMulSumSupplement : _SupplementBase { + double double_factor{0.0}; + at::Tensor tensor_factor; + NCCLPreMulSumSupplement(double f) : double_factor{f} {} + NCCLPreMulSumSupplement(at::Tensor t) : tensor_factor{std::move(t)} { + TORCH_CHECK_EQ(tensor_factor.numel(), 1); + } +}; + +// Other ReduceOps that need different supplementary data can also +// derive from _SupplementBase. +struct TORCH_API ReduceOp : torch::CustomClassHolder { + // note(crcrpar): RedOpType could be defined outside of `ReduceOp` + enum RedOpType : uint8_t { + SUM = 0, + AVG = 1, + PRODUCT = 2, + MIN = 3, + MAX = 4, + BAND = 5, // Bitwise AND + BOR = 6, // Bitwise OR + BXOR = 7, // Bitwise XOR + PREMUL_SUM = 8, // Multiply by a user-supplied constant before summing. + UNUSED = 9 + }; + + ReduceOp() = default; + + ReduceOp(RedOpType op) : op_(op) { + TORCH_INTERNAL_ASSERT( + op_ != PREMUL_SUM, + "Use `torch.distributed._make_nccl_premul_sum` to create an instance of ReduceOp with PREMUL_SUM"); + } + + ReduceOp( + RedOpType op, + const c10::intrusive_ptr<_SupplementBase>& optional_supplement) { + if (optional_supplement) { + op_ = op; + } else { + supplement_ = optional_supplement; + } + } + + // The heap resource supplement_, if it exists, is managed by a + // c10::intrusive_ptr, so constructors and operator= can be simple + ReduceOp(const ReduceOp& other) = default; + ReduceOp& operator=(const ReduceOp& other) = default; + + ReduceOp(ReduceOp&& other) = default; + ReduceOp& operator=(ReduceOp&& other) = default; + ~ReduceOp() override = default; + + operator RedOpType() const { + return op_; + } + + bool operator==(const std::uint8_t other) { + TORCH_INTERNAL_ASSERT(other < 9, "Invalid other op value"); + return other == op_; + } + + bool operator==(const ReduceOp::RedOpType other) { + return *this == static_cast(other); + } + + // todo(crcrpar): Handle `RedOpType::PREMUL_SUM` with its scaling factor. + bool operator==(const ReduceOp& other) { + return *this == other.op_; + } + + RedOpType op_ = SUM; + // supplement_ is "type-erased" storage for optional supplementary + // data the op might need. + // The point of use will know the derived type supplement_ really is, + // and downcast its pointer to extract the data as the needed type(s). + // Right now, only PREMUL_SUM needs supplementary data, but the same + // mechanism could extend to support other nontrivial reduce ops with + // different supplementary payloads. + c10::intrusive_ptr<_SupplementBase> supplement_; +}; + +template +ReduceOp makeNCCLPreMulSum(const T& factor) { + ReduceOp rop; + rop.op_ = ReduceOp::PREMUL_SUM; + rop.supplement_ = c10::make_intrusive(factor); + return rop; +} + +TORCH_API bool isComplexViewAsRealAllowed(const ReduceOp& reduceOp); + +constexpr auto kUnsetTimeout = std::chrono::milliseconds(-1); + +struct BroadcastOptions { + int64_t rootRank = 0; + int64_t rootTensor = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllreduceOptions { + ReduceOp reduceOp = ReduceOp::SUM; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; + std::optional sparseIndices = std::nullopt; +}; + +struct AllreduceCoalescedOptions : AllreduceOptions {}; + +struct ReduceOptions { + ReduceOp reduceOp = ReduceOp::SUM; + int64_t rootRank = 0; + int64_t rootTensor = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllgatherOptions { + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct GatherOptions { + int64_t rootRank = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct ScatterOptions { + int64_t rootRank = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct ReduceScatterOptions { + ReduceOp reduceOp = ReduceOp::SUM; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllToAllOptions { + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct BarrierOptions { + std::vector device_ids; + std::chrono::milliseconds timeout = kUnsetTimeout; + std::optional device; + bool asyncOp = true; +}; + +struct DistributedBackendOptions { + c10::intrusive_ptr<::c10d::Store> store; + int group_rank; + int group_size; + std::chrono::duration timeout; + std::string group_id; + std::vector global_ranks_in_group; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6e53aa7a21fb3a67135d15b2add0fe344d05dab6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include + +namespace c10d { + +#define RECORD_COMMS_TRACE( \ + _comms_tracer, _work, _opType, _rank, _comm_size, _inTensors, _outTensors) \ + do { \ + if (torch_ucc_config.enable_comms_logger) { \ + _comms_tracer->recordComms( \ + opTypeToString(_opType), \ + (uintptr_t)_work.get(), \ + _rank, \ + _comm_size, \ + _inTensors, \ + _outTensors); \ + } \ + } while (0) + +// interfaces to collect communication traces +class TORCH_API CommTraceLogger : public torch::CustomClassHolder { + private: + std::vector comms_trace_; + std::vector curBlocks_; /* unused */ + std::vector curOutSplitSizes_; + std::vector curInSplitSizes_; + int curRoot_ = -1; + unsigned long seqnum = 0; + + public: + void setCurBlock(const std::string& name); /* unused */ + void popBlock(); /* unused */ + // record root info if applicable, e.g., broadcast, gather, scatter + void recordOptionalInfo(int root = -1); + // record input/output splits of Alltoallv + void recordOptionalInfo( + const std::vector& outputSplitSizes = {}, + const std::vector& inputSplitSizes = {}); + // record essential comms information + void recordComms( + const std::string& collName, + const uintptr_t workReq = 0, + const int rank = -1, + const int world_size = -1, + const std::vector& inputTensors = {}, + const std::vector& outputTensor = {}); + // return collected comms traces + std::vector& getCommsTrace() { + return comms_trace_; + } +}; + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66357622a880f36d1cad25d5a1642478482bcf05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp @@ -0,0 +1,192 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include +#include +#include + +namespace c10d { + +// Macro to generate the error message on a non-successful UCC return value. +#define TORCH_UCC_GET_ERROR_MSG(_err, _error_msg, _result) \ + do { \ + _err = c10::str( \ + "[", \ + std::string(__FILE__), \ + ":", \ + std::to_string(__LINE__), \ + "] ", \ + logger->getLogPrefix(), \ + _error_msg, \ + ", error code ", \ + _result, \ + ": ", \ + ucc_status_string(_result), \ + ", system error code ", \ + errno); \ + } while (0) + +// Macro to throw on a non-successful UCC return value. +#define TORCH_UCC_CHECK(_cmd, _error_msg) \ + do { \ + ucc_status_t result = _cmd; \ + if (result != UCC_OK) { \ + std::string err; \ + TORCH_UCC_GET_ERROR_MSG(err, _error_msg, result); \ + TORCH_CHECK(false, err); \ + } \ + } while (0) + +// Macro and throw on a non-successful UCC return value and free its request. +#define TORCH_UCC_CHECK_REQUEST(_request, _cmd, _error_msg) \ + do { \ + ucc_status_t result = _cmd; \ + if (result != UCC_OK) { \ + std::string err; \ + TORCH_UCC_GET_ERROR_MSG(err, _error_msg, result); \ + if (_request != nullptr) { \ + ucc_collective_finalize(_request); \ + } \ + TORCH_CHECK(false, err); \ + } \ + } while (0) + +// Macros to print logs with unified format +#define TORCH_UCC_LOG_ERROR(_phase, _msg) \ + LOG(ERROR) << logger->getLogPrefix(_phase) << "[ERROR] " << _msg; +#define TORCH_UCC_LOG_INFO(_phase, _msg) \ + LOG(INFO) << logger->getLogPrefix(_phase) << "[INFO] " << _msg; +#define TORCH_UCC_LOG_DEBUG(_phase, _msg) \ + VLOG(1) << logger->getLogPrefix(_phase) << "[DEBUG] " << _msg; + +enum torch_ucc_phase_t { + TORCH_UCC_UNKNOWN = -1, + TORCH_UCC_INIT, + TORCH_UCC_HEALTH_CHECK, + TORCH_UCC_READY, + TORCH_UCC_COLL_POST, + TORCH_UCC_COLL_PROGRESS, + TORCH_UCC_FINALIZE, +}; + +const std::map ucc_phase_map = { + {TORCH_UCC_UNKNOWN, "UNKNOWN"}, + {TORCH_UCC_INIT, "INIT"}, + {TORCH_UCC_HEALTH_CHECK, "HEALTH_CHECK"}, + {TORCH_UCC_READY, "READY"}, + {TORCH_UCC_COLL_POST, "COLL_POST"}, + {TORCH_UCC_COLL_PROGRESS, "COLL_PROGRESS"}, + {TORCH_UCC_FINALIZE, "FINALIZE"}, +}; + +class CommTraceLogger; + +class TORCH_API ProcessGroupUCCLogger : public torch::CustomClassHolder { + public: + ProcessGroupUCCLogger(); + ProcessGroupUCCLogger(std::string log_prefix, torch_ucc_phase_t phase); + + std::string getLogPrefix(torch_ucc_phase_t phase = TORCH_UCC_UNKNOWN); + void setLogPrefix(std::string log_prefix); + inline void setPhase(torch_ucc_phase_t phase) { + local_phase = phase; + } + + void initCommsTracer(); + void flushComms(int rank, int world_size); + std::shared_ptr trace_generator = nullptr; + + protected: + std::string log_prefix; + torch_ucc_phase_t local_phase = TORCH_UCC_UNKNOWN; + bool initialized_CommTraceLogger = false; +}; + +struct torch_ucc_oob_coll_info_t { + c10::intrusive_ptr store; + uint32_t comm_id; + int rank; + int size; + void* rbuf; + size_t msglen; + std::string getKey(std::string key) { + return std::to_string(comm_id) + key; + } +}; + +class CommBase { + public: + CommBase(const c10::intrusive_ptr& logger_) + : logger(logger_) {} + virtual void progress() = 0; + virtual void free_request(ucc_coll_req_h request) = 0; + virtual ~CommBase() {} + c10::intrusive_ptr logger; +}; +class CommUCC : public CommBase { + public: + ucc_lib_h lib{nullptr}; + ucc_context_h context{nullptr}; + + public: + void progress() override; + CommUCC( + std::shared_ptr oob, + const c10::intrusive_ptr& logger); + void free_request(ucc_coll_req_h request) override; + ~CommUCC(); +}; + +ucc_status_t oob_allgather( + void* sbuf, + void* rbuf, + size_t msglen, + void* coll_info, + void** req); + +ucc_status_t oob_allgather_test(void* req); + +ucc_status_t oob_allgather_free(void* req); + +// trim: remove spaces before and after the string view +// implementation borrowed from https://stackoverflow.com/a/17976541 +inline std::string_view trim(std::string_view s) { + auto wsfront = std::find_if_not( + s.begin(), s.end(), [](int c) { return std::isspace(c); }); + auto wsback = std::find_if_not(s.rbegin(), s.rend(), [](int c) { + return std::isspace(c); + }).base(); + return ( + wsback <= wsfront ? "" : s.substr(wsfront - s.begin(), wsback - wsfront)); +} + +inline std::string tolower(std::string_view s) { + std::string result; + result.reserve(s.size()); + for (auto c : s) { + result.push_back(std::tolower(c)); + } + return result; +} + +inline std::vector parse_list(std::string list) { + std::vector result; + list = tolower(trim(list)); + while (!list.empty()) { + const auto end_pos = list.find_first_of(','); + const auto token = trim(list.substr(0, end_pos)); + result.push_back(std::string(token)); + list = (end_pos != std::string_view::npos) ? list.substr(end_pos + 1) : ""; + } + return result; +} + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4293911d1faea1f3595eea7af19f7094afcfac60 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d::tcputil { + +#define CONNECT_SOCKET_OFFSET 2 + +inline int poll(struct pollfd* fds, unsigned long nfds, int timeout) { + return ::poll(fds, nfds, timeout); +} + +inline void addPollfd( + std::vector& fds, + int socket, + short events) { + fds.push_back({.fd = socket, .events = events}); +} + +inline struct ::pollfd getPollfd(int socket, short events) { + struct ::pollfd res = {.fd = socket, .events = events}; + return res; +} + +} // namespace c10d::tcputil + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fedb64349bbbb088ea87d493c37c0e8efaef2e65 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp @@ -0,0 +1,750 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +typedef SSIZE_T ssize_t; +#pragma comment(lib, "Ws2_32.lib") +#else +#include +#include +#include +#include +#include +#endif + +#include + +#include +#include +#include +#include +#include + +namespace c10d { + +TORCH_API size_t getTensorsNumel(const std::vector& tensors); + +// Retrieve tensor shapes from a given tensor. +TORCH_API std::vector getTensorShapes( + const std::vector& tensors); + +// Use -2 to represent unset state of env vars +#define C10D_ENV_NOT_SET -2 + +#define WARN_ENV_VAR_ONCE(deprecated_env, new_env) \ + TORCH_WARN_ONCE( \ + "Environment variable " + deprecated_env + " is deprecated; use " + \ + new_env + " instead"); + +// Turns at::IntArrayRef into "(1, 2, 3, 4)". +inline std::string toString(at::IntArrayRef l) { + std::stringstream ss; + ss << '('; + for (const auto i : c10::irange(l.size())) { + if (i > 0) { + ss << ", "; + } + ss << l[i]; + } + ss << ')'; + return ss.str(); +} + +inline std::string toString(const c10::Layout& layout) { + std::stringstream ss; + ss << layout; + return ss.str(); +} + +inline void assertSameType( + const at::DeprecatedTypeProperties& type, + const std::vector& tensors) { + for (const auto i : c10::irange(tensors.size())) { + if (!tensors[i].options().type_equal(type.options())) { + const std::string expected = type.toString(); + const std::string actual = tensors[i].toString(); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "mixed types (" + expected + " and " + actual + ")"); + } + } +} + +inline std::vector split( + char separator, + const std::string& string) { + std::vector pieces; + std::stringstream ss(string); + std::string item; + while (std::getline(ss, item, separator)) { + pieces.push_back(std::move(item)); + } + return pieces; +} + +inline std::string getCvarString( + const std::vector& env, + const char* def) { + std::string ret(def); + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + ret = val.value(); + } + + return ret; +} + +inline int getCvarInt(const std::vector& env, int def) { + int ret = def; + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + const auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + try { + ret = std::stoi(val.value()); + } catch (std::exception&) { + TORCH_CHECK(false, "Invalid value for environment variable: " + env[i]); + } + } + + return ret; +} + +inline bool getCvarBool(const std::vector& env, bool def) { + bool ret = def; + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + for (auto& x : val.value()) { + // NOLINTNEXTLINE(*-narrowing-conversions) + x = std::tolower(x); + } + + if (val == "y" || val == "yes" || val == "1" || val == "t" || + val == "true") { + ret = true; + } else if ( + val == "n" || val == "no" || val == "0" || val == "f" || + val == "false") { + ret = false; + } else { + TORCH_CHECK(false, "Invalid value for environment variable: " + env[i]); + return ret; + } + } + + return ret; +} + +inline void assertSameSizes( + const at::IntArrayRef& sizes, + const std::vector& tensors) { + for (const auto i : c10::irange(tensors.size())) { + if (!tensors[i].sizes().equals(sizes)) { + const auto expected = toString(sizes); + const auto actual = toString(tensors[i].sizes()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "mixed sizes (" + expected + " and " + actual + ")"); + } + } +} + +inline void assertSameSizeAndType(const std::vector& tensors) { + // Ensure we have at least one tensor + if (tensors.empty()) { + throw std::invalid_argument("argument is empty"); + } + + // Ensure all tensors have identical type and shape + auto options = tensors[0].options(); + auto sizes = tensors[0].sizes(); + for (const auto i : c10::irange(1, tensors.size())) { + if (!tensors[i].options().type_equal(options)) { + const auto expected = toString(options); + const auto actual = toString(tensors[i].options()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "argument contains mixed types (" + expected + " and " + actual + + ")"); + } + if (!tensors[i].sizes().equals(sizes)) { + const auto expected = toString(sizes); + const auto actual = toString(tensors[i].sizes()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "argument contains mixed types (" + expected + " and " + actual + + ")"); + } + } +} + +inline void assertTypeMatch( + const std::function& fn, + const at::DeprecatedTypeProperties& type, + const at::ArrayRef tensors, + size_t index) { + if (!tensors[index].options().type_equal(type.options())) { + fn("invalid tensor type at index " + std::to_string(index) + " (expected " + + type.toString() + ", got " + tensors[index].toString() + ")"); + } +} + +inline void assertTypeMatch( + const std::function& fn, + const at::TensorOptions& options, + const at::ArrayRef tensors, + size_t index) { + if (!tensors[index].options().type_equal(options)) { + fn("invalid tensor type at index " + std::to_string(index) + " (expected " + + toString(options) + ", got " + toString(tensors[index].options()) + ")"); + } +} + +inline void assertSizesMatch( + const std::function& fn, + const at::IntArrayRef& sizes, + const at::ArrayRef tensors, + size_t index) { + if (tensors[index].sizes() != sizes) { + fn("invalid tensor size at index " + std::to_string(index) + " (expected " + + toString(sizes) + ", got " + toString(tensors[index].sizes()) + ")"); + } +} + +inline void assertLayoutMatch( + const std::function& fn, + const c10::Layout& expected, + const at::ArrayRef tensors, + size_t index) { + const auto& actual = tensors[index].layout(); + if (actual != expected) { + fn("invalid tensor layout at index " + std::to_string(index) + + " (expected " + toString(expected) + ", got " + toString(actual) + ")"); + } +} + +inline void assertLayoutMatch( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& layout = tensors[0].layout(); + for (const auto i : c10::irange(1, tensors.size())) { + assertLayoutMatch(fn, layout, tensors, i); + } +} + +inline void assertNonEmpty( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.empty()) { + fn("requires non-empty tensor list"); + } +} + +inline void assertSingleElement( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element tensor list"); + } +} + +inline void assertSingleElementInput( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element input tensor list"); + } +} + +inline void assertSingleElementOutput( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element output tensor list"); + } +} + +inline void assertRootRank( + const std::function& fn, + int64_t rank, + int64_t size) { + if (rank < 0 || rank >= size) { + fn("invalid root rank: " + std::to_string(rank)); + } +} + +inline void assertRootTensor( + const std::function& fn, + int64_t rank, + int64_t size) { + if (rank < 0 || rank >= size) { + fn("invalid root tensor: " + std::to_string(rank)); + } +} + +inline void assertDense( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& layout = tensors[0].layout(); + if (layout != at::kStrided) { + fn("only supports dense tensors"); + } +} + +inline void assertCPU( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& device = tensors[0].device(); + if (device.type() != at::kCPU) { + fn("only supports CPU tensors"); + } +} + +inline void assertSameDevice( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() < 2) { + return; + } + const auto& device = tensors[0].device(); + for (const auto i : c10::irange(1, tensors.size())) { + if (tensors[i].device() != device) { + fn("tensors should be on the same device"); + } + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors, + const at::DeprecatedTypeProperties& type, + const at::IntArrayRef& sizes) { + for (const auto i : c10::irange(tensors.size())) { + assertTypeMatch(fn, type, tensors, i); + assertSizesMatch(fn, sizes, tensors, i); + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors, + const at::TensorOptions& options, + const at::IntArrayRef& sizes) { + for (const auto i : c10::irange(tensors.size())) { + assertTypeMatch(fn, options, tensors, i); + assertSizesMatch(fn, sizes, tensors, i); + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& options = tensors[0].options(); + const auto sizes = tensors[0].sizes(); + assertTypeAndSizesMatch(fn, tensors.slice(1), options, sizes); +} + +// Copied from ATen/core/functional.h. +template +inline auto fmap(T& inputs, const F& fn) + -> std::vector { + std::vector r; + r.reserve(inputs.size()); + for (auto& input : inputs) { + r.push_back(fn(input)); + } + return r; +} + +// Copied from torch/csrc/utils/tensor_flatten.h. +inline at::Tensor flattenDenseTensors(at::TensorList tensors) { + static const auto flatten = [](const at::Tensor& t) { + return t.contiguous().view({-1}); + }; + if (tensors.size() == 1) { + return flatten(tensors[0]); + } + return at::cat(::c10d::fmap(tensors, flatten)); +} + +inline at::Tensor newLikeFlat( + std::vector>& tensors, + size_t deviceIdx) { + if (tensors.empty() || tensors[0].empty()) { + TORCH_CHECK(false, "Received an empty list"); + } + if (deviceIdx >= tensors.size()) { + TORCH_CHECK(false, "Invalid device index"); + } + auto& t = tensors[deviceIdx][0]; + auto device = t.device(); + for (const auto i : c10::irange(1, tensors[deviceIdx].size())) { + if (tensors[deviceIdx][i].device() != device) { + TORCH_CHECK(false, "Expecting all tensors on the same device"); + } + } + at::DeviceGuard gpuGuard(device); + std::vector sizes{static_cast(tensors[deviceIdx].size())}; + std::vector strides{t.numel()}; + sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); + strides.insert(strides.end(), t.strides().begin(), t.strides().end()); + return at::empty_strided( + sizes, strides, t.options().memory_format(std::nullopt)); +} + +inline at::Tensor newLikeFlat(std::vector& tensors) { + if (tensors.empty()) { + TORCH_CHECK(false, "Received an empty list"); + } + auto& t = tensors[0]; + at::DeviceGuard gpuGuard(t.device()); + std::vector sizes{static_cast(tensors.size())}; + sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); + return at::empty(sizes, t.options()); +} + +inline std::vector> getSizes( + const std::vector& tensors) { + std::vector> sizes(tensors.size()); + for (const auto i : c10::irange(tensors.size())) { + sizes[i] = tensors[i].sizes().vec(); + } + return sizes; +} + +inline std::vector getDevices(const std::vector& tensors) { + std::vector devices(tensors.size(), -1); + if (tensors[0].device().is_cuda()) { + for (const auto i : c10::irange(tensors.size())) { + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + devices[i] = tensors[i].storage().device().index(); + } + } + return devices; +} + +template +inline T* getDataPointer(const at::Tensor& tensor) { + // This method is only used in ProcessGroupGloo for now. Call sites must make + // sure that the input tensor is contiguous. It is OK if the tensor does not + // start from the beginning of the storage. For example, it could come from + // chunk(..., dim=0)[1]. Hence, we need to use data_ptr() instead of + // tensor.storage().data() + // NB: not using tensor.data() because tensor is not aware of gloo::TYPE + return static_cast(tensor.data_ptr()); +} + +template +std::vector getDataPointers(const std::vector& tensors) { + std::vector ptrs(tensors.size()); + for (const auto i : c10::irange(tensors.size())) { + ptrs[i] = getDataPointer(tensors[i]); + } + return ptrs; +} + +// For alltoall split size sanity check +inline void checkSplitSizes( + const std::vector& split_sizes, + const at::Tensor& tensor, + int group_size) { + if (split_sizes.empty()) { + TORCH_CHECK( + tensor.size(0) % group_size == 0, + "Tensor's dim 0 does not divide equally across group size"); + } else { + TORCH_CHECK( + split_sizes.size() == static_cast(group_size), + "Number of tensor splits not equal to group size"); + const auto sum = c10::sum_integers(split_sizes); + TORCH_CHECK( + sum == tensor.size(0), "Split sizes doesn't match total dim 0 size"); + } +} + +// Compute alltoall lengths and offsets, handling multi-dimension tensors +template +size_t computeLengthsAndOffsets( + const std::vector& split_sizes, + const at::Tensor& tensor, + std::vector* lengths, + std::vector* offsets) { + size_t group_size = lengths->size(); + bool equal_splits = false; + size_t dim0_size = tensor.size(0); + size_t row_size = (dim0_size ? tensor.numel() / dim0_size : 1); + size_t split_size = 0; + size_t offset = 0; + + if (split_sizes.empty()) { + equal_splits = true; + split_size = tensor.size(0) / group_size; + } + for (const auto i : c10::irange(group_size)) { + size_t length = row_size * (equal_splits ? split_size : split_sizes[i]); + (*lengths)[i] = length; + (*offsets)[i] = offset; + // TODO: see if we should add overflow protection for offset + offset += length; + } + return offset; +} + +template +size_t computeLengthsAndOffsets( + const std::vector& tensors, + std::vector* lengths, + std::vector* offsets) { + size_t group_size = lengths->size(); + size_t offset = 0; + for (const auto i : c10::irange(group_size)) { + size_t length = tensors[i].numel(); + (*lengths)[i] = length; + (*offsets)[i] = offset; + offset += length; + } + return offset; +} + +// Get the start and stride of the global rank from a list of global ranks +// If the global ranks do not follow the consecutive rule, the stride will be -1 +void TORCH_API getGlobalRankStartAndStride( + const std::vector& globalRanksInGroup, + int& globalRankStart, + int& globalRankStride); + +using RankType = uint32_t; +using SizeType = uint64_t; + +// `errno` is only meaningful when it fails. E.g., a successful `fork()` sets +// `errno` to `EINVAL` in child process on some macos +// (https://stackoverflow.com/a/20295079), and thus `errno` should really only +// be inspected if an error occurred. +// +// `success_cond` is an expression used to check if an error has happened. So +// for `fork()`, we can use `SYSCHECK(pid = fork(), pid != -1)`. The function +// output is stored in variable `__output` and may be used in `success_cond`. +#ifdef _WIN32 +#define SYSCHECK(expr, success_cond) \ + while (true) { \ + auto __output = (expr); \ + auto errno_local = WSAGetLastError(); \ + (void)__output; \ + if (!(success_cond)) { \ + if (errno == EINTR) { \ + continue; \ + } else if ( \ + errno_local == WSAETIMEDOUT || errno_local == WSAEWOULDBLOCK) { \ + C10_THROW_ERROR(DistNetworkError, "Socket Timeout"); \ + } else { \ + C10_THROW_ERROR(DistNetworkError, c10::utils::str_error(errno_local)); \ + } \ + } else { \ + break; \ + } \ + } +#else +#define SYSCHECK(expr, success_cond) \ + while (true) { \ + auto __output = (expr); \ + (void)__output; \ + if (!(success_cond)) { \ + if (errno == EINTR) { \ + continue; \ + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { \ + C10_THROW_ERROR(DistNetworkError, "Socket Timeout"); \ + } else { \ + C10_THROW_ERROR(DistNetworkError, c10::utils::str_error(errno)); \ + } \ + } else { \ + break; \ + } \ + } +#endif + +// Most functions indicate error by returning `-1`. This is a helper macro for +// this common case with `SYSCHECK`. +// Since SOCKET_ERROR = -1 in MSVC, so also leverage SYSCHECK_ERR_RETURN_NEG1 +#define SYSCHECK_ERR_RETURN_NEG1(expr) SYSCHECK(expr, __output != -1) + +namespace tcputil { + +// Send and receive +template +void sendBytes( + int socket, + const T* buffer, + size_t length, + bool moreData = false) { + size_t bytesToSend = sizeof(T) * length; + if (bytesToSend == 0) { + return; + } + + auto currentBytes = reinterpret_cast(buffer); + + int flags = 0; + +#ifdef MSG_MORE + if (moreData) { // there is more data to send + flags |= MSG_MORE; + } +#endif + +// Ignore SIGPIPE as the send() return value is always checked for error +#ifdef MSG_NOSIGNAL + flags |= MSG_NOSIGNAL; +#endif + + while (bytesToSend > 0) { + ssize_t bytesSent = 0; + SYSCHECK_ERR_RETURN_NEG1( + bytesSent = ::send(socket, currentBytes, bytesToSend, flags)) + if (bytesSent == 0) { + C10_THROW_ERROR( + DistNetworkError, + "Failed to send, sent 0 bytes. " + "Connection was likely closed. " + "Did the remote server shutdown or crash?"); + } + + bytesToSend -= bytesSent; + currentBytes += bytesSent; + } +} + +template +void recvBytes(int socket, T* buffer, size_t length) { + size_t bytesToReceive = sizeof(T) * length; + if (bytesToReceive == 0) { + return; + } + + auto currentBytes = reinterpret_cast(buffer); + + while (bytesToReceive > 0) { + ssize_t bytesReceived = 0; + SYSCHECK_ERR_RETURN_NEG1( + bytesReceived = recv(socket, currentBytes, bytesToReceive, 0)) + if (bytesReceived == 0) { + C10_THROW_ERROR( + DistNetworkError, + "Failed to recv, got 0 bytes. " + "Connection was likely closed. " + "Did the remote server shutdown or crash?"); + } + + bytesToReceive -= bytesReceived; + currentBytes += bytesReceived; + } +} + +// send a vector's length and data +template +void sendVector(int socket, const std::vector& vec, bool moreData = false) { + SizeType size = vec.size(); + sendBytes(socket, &size, 1, true); + sendBytes(socket, vec.data(), size, moreData); +} + +// receive a vector as sent in sendVector +template +std::vector recvVector(int socket) { + SizeType valueSize = 0; + recvBytes(socket, &valueSize, 1); + std::vector value(valueSize); + recvBytes(socket, value.data(), value.size()); + return value; +} + +// this is only for convenience when sending rvalues +template +void sendValue(int socket, const T& value, bool moreData = false) { + sendBytes(socket, &value, 1, moreData); +} + +template +T recvValue(int socket) { + T value; + recvBytes(socket, &value, 1); + return value; +} + +// send a string's length and data +inline void sendString( + int socket, + const std::string& str, + bool moreData = false) { + SizeType size = str.size(); + sendBytes(socket, &size, 1, true); + sendBytes(socket, str.data(), size, moreData); +} + +// receive a string as sent in sendString +inline std::string recvString(int socket) { + SizeType valueSize = 0; + recvBytes(socket, &valueSize, 1); + std::vector value(valueSize); + recvBytes(socket, value.data(), value.size()); + return std::string(value.data(), value.size()); +} + +} // namespace tcputil +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4e94641b9b8ed2ce362b092398a6af0243aa4a81 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d::tcputil { + +#define CONNECT_SOCKET_OFFSET 1 + +inline int poll(struct pollfd* fdArray, unsigned long fds, int timeout) { + return WSAPoll(fdArray, fds, timeout); +} + +inline void addPollfd( + std::vector& fds, + int socket, + short events) { + fds.push_back({(SOCKET)socket, events}); +} + +inline struct ::pollfd getPollfd(int socket, short events) { + struct ::pollfd res = {(SOCKET)socket, events}; + return res; +} + +} // namespace c10d::tcputil + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a109527dd40efdc364cc899e96d482dea336b262 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp @@ -0,0 +1,190 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +constexpr auto kNoTimeout = std::chrono::milliseconds(0); + +namespace c10d { + +constexpr const char* const kSeqNumStoreKey = "SEQ_NUM_STORE_KEY"; + +enum class OpType : std::uint8_t { + BROADCAST = 0, + ALLREDUCE = 1, + ALLREDUCE_COALESCED = 2, + REDUCE = 3, + ALLGATHER = 4, + _ALLGATHER_BASE = 5, + ALLGATHER_COALESCED = 6, + GATHER = 7, + SCATTER = 8, + REDUCE_SCATTER = 9, + ALLTOALL_BASE = 10, + ALLTOALL = 11, + SEND = 12, + RECV = 13, + RECVANYSOURCE = 14, + BARRIER = 15, + _REDUCE_SCATTER_BASE = 16, + COALESCED = 17, + _ALLREDUCE_SPARSE = 18, + UNKNOWN = 100, +}; + +// TODO: support different types of failures/errors +enum class WorkResult : std::uint8_t { + SUCCESS = 0, + TIMEOUT = 1, + COMM_ERROR = 2, + UNKNOWN = 100, +}; + +// Converts OpType to human readable string. +TORCH_API std::string opTypeToString(OpType opType); + +// Whether or not an OP is an p2p op (SEND, RECV, RECVANYSOURCE) +TORCH_API bool isP2POp(OpType opType, bool batchP2P = false); + +// Please do not use Work API, it is going away, to be +// replaced by ivalue::Future. +// Python binding for this class might change, please do not assume +// this will be bound using pybind. +class TORCH_API Work : public torch::CustomClassHolder { + public: + Work( + int rank = -1, + OpType opType = OpType::UNKNOWN, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~Work() override; + + // Checks if request has completed. Non-blocking operation. + virtual bool isCompleted(); + + // Returns if the work completed successfully. + // If false, the exception function can be called to get details. + virtual bool isSuccess() const; + + // Returns exception if isSuccess() returned false. + virtual std::exception_ptr exception() const; + + // Returns source rank if this objects represents a recv-from-any. + virtual int sourceRank() const; + + // Returns result tensors, if applicable. + // If work is not supposed to have result, we return empty list. + virtual std::vector result(); + + // Ensures that operations on the output tensors that are invoked + // after this function returns are correctly sequenced after the + // asynchronous completion of this work. + // + // For CUDA tensors, it inserts stream synchronization such that + // the streams of the caller wait for completion of the + // asynchronous operations on the destination tensors. + // + // For CPU tensors, it is currently a nop. + // + // This function should only be used if the caller polls for + // completion through the `isCompleted` function, it has returned + // true, and the `isSuccess` function also has returned true. + // + virtual void synchronize(); + + // Waits until request completes. Blocking operation. + // Throws if the work completed with an exception. + // Returns false if the work is aborted. + // Otherwise, it always returns true, indicating the work is completed. + // + // Functionally equivalent to: + // + // while (!isCompleted()) { /* nop */ } + // auto success = isSuccess(); + // if (!success) { std::rethrow_exception(exception()); } + // return success; + // + virtual bool wait(std::chrono::milliseconds timeout = kNoTimeout); + + // Blocks the current stream until the work is completed. + // This is equivalent to synchronize for CUDA tensors but works for both CPU + // tensors and CUDA tensors by using a spinlock CUDA kernel. + // This will immediately return. + // If no stream is active it will throw an error. + virtual void blockCurrentStream(); + + virtual void abort(); + + // Returns a Future object that will be associated with the completion of + // work. Only NCCL backend is currently supported. + virtual c10::intrusive_ptr getFuture(); + + // Get a Future object that would be marked as either success or failure + // This API can be used by the user to track the completion of the work + // and handle the exception if any. + virtual c10::intrusive_ptr getFutureResult(); + + virtual float getDuration() const; + + virtual uint64_t getSequencenumber() const; + + OpType retrieveOpType() const; + + static c10::intrusive_ptr create_from_future( + const c10::intrusive_ptr& /*future*/); + + protected: + // Completes the work object and optionally sets the exception in a + // thread-safe manner. Notifies all waiting condition variables as well. + void finish(std::exception_ptr exception = nullptr); + + // Similar to finish, but throws an exception if one is already set or + // provided by the user. + void finishAndThrow(std::exception_ptr exception); + + mutable std::mutex mutex_; + std::condition_variable cv_; + bool completed_ = false; + std::exception_ptr exception_; + + // Current rank of the node. + const int rank_; + + // Operation type that this work object refers to. + OpType opType_; + + // When profiling, the callback to record end of operation event. This + // callback needs to be called when collective operation is complete. + std::function recordFunctionEndCallback_; +}; + +struct TORCH_API WorkInfo { + WorkInfo( + const OpType& opType, + const uint64_t seq, + const std::chrono::time_point& timeStarted, + const std::chrono::time_point& timeFinished, + const std::chrono::duration& activeDuration) + : opType(opType), + seq(seq), + timeStarted(timeStarted), + timeFinished(timeFinished), + activeDuration(activeDuration) {} + + OpType opType; + uint64_t seq; + std::chrono::time_point timeStarted; + std::chrono::time_point timeFinished; + std::chrono::duration activeDuration; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h new file mode 100644 index 0000000000000000000000000000000000000000..45daf59b55ab849b2e72bdf275a6c09315413ef0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::c10d { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e5099eb2980e5d3f38ab7aa723f5f92880b73d91 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp @@ -0,0 +1,147 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d { + +// Broadcast many tensors to all processes in the process group. +TORCH_API void broadcast_coalesced( + const c10::intrusive_ptr& process_group, + at::TensorList tensors, + size_t buffer_size, + int rank = 0); + +// This class passes bucket contents tensor to DDP communication hook. +class TORCH_API GradBucket { + public: + explicit GradBucket( + size_t index, + size_t bucket_count, + at::Tensor tensor, + std::vector offsets, + std::vector lengths, + std::vector sizes_vec, + std::vector parameters, + std::optional sparse_grad_indices) + : index_(index), + bucket_count_(bucket_count), + buffer_(std::move(tensor)), + offsets_(std::move(offsets)), + lengths_(std::move(lengths)), + sizes_vec_(std::move(sizes_vec)), + parameters_(std::move(parameters)), + sparse_grad_indices_(std::move(sparse_grad_indices)) {} + + // Returns the index of the bucket, which is unique across all the buckets. + size_t getIndex() const { + return index_; + } + + const at::Tensor& getBuffer() const { + return buffer_; + } + + // Returns a mutable buffer compared with the above method. + at::Tensor& getBufferRef() { + return buffer_; + } + + // Overwrites the buffer at a specific index. + void setBuffer(at::Tensor& buffer) { + buffer_ = buffer; + } + + // Each tensor in the list that getGradients corresponds to a + // parameter. + std::vector getGradients() const; + + // Returns model parameters belonging to this bucket. They are returned in the + // same order as gradient tensors via getGradients(). For example, + // getParameters[i] will have its gradient stored in + // getGradients[i] + const std::vector getParameters() const { + return parameters_; + } + + // Returns whether this bucket is the last bucket to allreduce in an + // iteration. + bool isLast() const { + return index_ == bucket_count_ - 1; + } + + std::optional& getSparseGradIndices() { + return sparse_grad_indices_; + } + + private: + size_t index_; + size_t bucket_count_; + at::Tensor buffer_; + + // Per-variable info in buffer_. + std::vector offsets_; + std::vector lengths_; + std::vector sizes_vec_; + + // Model parameters for this bucket. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector parameters_; + + // Predefined sparse indices for this bucket (only used for sparse tensors). + // The gradients will be updated to have indices with these tensor values + std::optional sparse_grad_indices_; +}; + +// Base class of both `PythonCommHook` and `CppCommHook`. +// Requires implementing 1) `runHook` method that communicates gradients +// asynchronously, and 2) `parseHookResult` method that converts the hook +// result into a tensor. +class TORCH_API CommHookInterface { + public: + virtual ~CommHookInterface() = default; + + // Passes the input grad bucket to the registered communication hook. + // Once the tensor in the bucket are ready, kicks off the hook asynchronously + // and returns a future that holds the communication results. + virtual c10::intrusive_ptr runHook( + GradBucket& bucket) = 0; + + // Returns the resulting tensor once the communication hook result is + // ready. The resulting tensor will then be copied to the grads of + // individual parameters. + virtual at::Tensor parseHookResult(const c10::IValue& result) = 0; +}; + +namespace detail { +// This helper function is called both by CppCommHookInterface below and inside +// reducer. +TORCH_API at::Tensor parseCppCommHookResult(const c10::IValue& result); +} // namespace detail + +// This CppCommHook interface only requires implementing runHook method that +// potentially uses a state. +template +class CppCommHookInterface : public CommHookInterface { + public: + explicit CppCommHookInterface(T state) : state_(std::move(state)) {} + + ~CppCommHookInterface() override = default; + + at::Tensor parseHookResult(const c10::IValue& result) override { + return detail::parseCppCommHookResult(result); + } + + protected: + T state_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5beaa289331f4c3b81d3a279dddb8a3be6b51b38 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +using namespace std::chrono_literals; + +class TORCH_API ControlCollectives : public torch::CustomClassHolder { + public: + virtual void barrier( + const std::string& key, + std::chrono::milliseconds timeout = 5min, + bool block = true) = 0; + + virtual void broadcastSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector broadcastRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual void gatherSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector> gatherRecv( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual std::vector scatterSend( + const std::string& key, + const std::vector>& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector scatterRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual std::vector> allGather( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual int64_t allSum( + const std::string& key, + int64_t data, + std::chrono::milliseconds timeout = 5min) = 0; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5782eb49f5755c1f2270428dd171260ad1d28a02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace c10d { + +class TORCH_API StoreCollectives : public ControlCollectives { + public: + explicit StoreCollectives( + c10::intrusive_ptr store, + int rank, + int worldSize); + + void barrier( + const std::string& key, + std::chrono::milliseconds timeout = 5min, + bool block = true) override; + + void broadcastSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector broadcastRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) override; + + void gatherSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector> gatherRecv( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + + std::vector scatterSend( + const std::string& key, + const std::vector>& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector scatterRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) override; + + std::vector> allGather( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + + int64_t allSum( + const std::string& key, + int64_t data, + std::chrono::milliseconds timeout = 5min) override; + + private: + void enforceUnique(const std::string& key); + + private: + c10::intrusive_ptr store_; + int rank_; + int worldSize_; + + c10::FastSet seenKeys_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bf79294f23d5515edbe5c98dc20b6b1ae16fdb5a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace c10d::control_plane { + +// Request represents a request to the handler. This conceptually maps to an +// HTTP request but could be called via other transports. +class TORCH_API Request { + public: + virtual ~Request() = default; + + virtual const std::string& body() const = 0; + + virtual const std::multimap& params() const = 0; + + std::string getParam(const std::string& key) const { + auto it = params().find(key); + if (it != params().end()) { + return it->second; + } + return ""; + } +}; + +// Response represents a response to the handler. This conceptually maps to an +// HTTP response but could be called via other transports. +class TORCH_API Response { + public: + virtual ~Response() = default; + + // Set the response body to the provided string. + // TODO: add support for chunked responses + virtual void setContent( + std::string&& content, + const std::string& content_type) = 0; + + // Set the response status code. + // These should match standard HTTP status codes. + virtual void setStatus(int status) = 0; +}; + +using HandlerFunc = std::function; + +// Registers a handler. The name needs to be unique and can be called by using +// getHandler directly or via WorkerServer for remote requests. +// These handlers are called from a background C++ thread concurrently with the +// main thread. These handlers need to be thread safe and not cause issues +// during Python training. +TORCH_API void registerHandler(const std::string& name, HandlerFunc f); + +// Fetches a handler by name. +TORCH_API HandlerFunc getHandler(const std::string& name); + +TORCH_API std::vector getHandlerNames(); + +// Registers a handler statically. +// See registerHandler for more details. +class TORCH_API RegisterHandler { + public: + RegisterHandler(const std::string& name, HandlerFunc f) { + registerHandler(name, std::move(f)); + } + + // disable move, copy + RegisterHandler(const RegisterHandler&) = delete; + RegisterHandler(RegisterHandler&&) = delete; + RegisterHandler& operator=(const RegisterHandler&) = delete; + RegisterHandler& operator=(RegisterHandler&&) = delete; +}; + +} // namespace c10d::control_plane + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b6e73ec065fe5b1b094f4fabd97ad887a9b94b68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { +namespace control_plane { + +// Returns all wait counter values as a JSON string +std::string getWaitCounterValuesJson(); + +// Ensures the wait counter backend is registered +void ensureWaitCounterBackendRegistered(); + +} // namespace control_plane +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9ffaa77726734d694015d8900ae759b762deff6b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-literal-operator") +#include +C10_DIAGNOSTIC_POP() + +namespace c10d::control_plane { + +class TORCH_API WorkerServer : public c10::intrusive_ptr_target { + public: + WorkerServer(const std::string& hostOrFile, int port = -1); + ~WorkerServer() override; + + void shutdown(); + + int port() { + return port_; + } + + private: + httplib::Server server_; + std::thread serverThread_; + int port_; +}; + +} // namespace c10d::control_plane + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e289e3d0aff7561ab197ce8c3e18ccf33d2c9ff9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +class TORCH_API CUDAEventCache + : public std::enable_shared_from_this { + public: + CUDAEventCache(); + std::shared_ptr create(bool timing); + static std::shared_ptr get(at::DeviceIndex device); + + private: + std::mutex cacheMutex_; + // NOTE: We intentionally store raw pointers so that + // we do not attempt to destroy the event objects on process exit, + // because cuda may be gone. + std::array, 2> + eventsArray_; // 0 for timing=false, 1 for timing=true +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8195a6faa33d4f86c771ff08b254022e43b3cacd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace c10d::cuda { + +enum StreamBlockStatus : int32_t { + UNKNOWN = 0, + RUNNING = 1, + TIMED_OUT = 2, + ABORTED = 3, +}; + +/* +StreamBlock implements a baton that will block a the active CUDA stream +until aborted by the main process. +*/ +class TORCH_API StreamBlock { + public: + virtual ~StreamBlock() = default; + virtual void abort() = 0; + virtual StreamBlockStatus status() = 0; +}; + +std::unique_ptr block_stream(std::chrono::milliseconds timeout); + +// Declare a registry so we can call the CUDA StreamBlock API from CPU only code +// (i.e. ProcessGroup/Work objects in libtorch_cpu). +// The implementation lives defined in StreamBlock.cu. +TORCH_DECLARE_REGISTRY( + StreamBlockRegistry, + StreamBlock, + std::chrono::milliseconds); + +} // namespace c10d::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8a2520f4fd00d62e506f65e8645c1b035fee2c7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// This file contains utility functions common for CUDA, which can be used by +// ProcessGroupNCCL or SymmetricMemory. + +namespace c10d::cuda { + +bool deviceSupportsMulticast(int device_idx); + +} // namespace c10d::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h new file mode 100644 index 0000000000000000000000000000000000000000..cf343d0eef5a8b1a06ab96d0fbf0a19aaa61f87f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace c10d { + +enum class DebugLevel { Off = 0, Info = 1, Detail = 2 }; + +TORCH_API void setDebugLevel(DebugLevel level); + +// Sets the debug level based on the value of the `TORCH_DISTRIBUTED_DEBUG` +// environment variable. +TORCH_API void setDebugLevelFromEnvironment(); + +TORCH_API DebugLevel debug_level() noexcept; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c56d6097a4d48dfe170ebab6b5e49d7f8504e83e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d { + +enum class BuiltinCommHookType : uint8_t { + ALLREDUCE = 1, + FP16_COMPRESS = 2, +}; + +class AllReduceCommHook + : public CppCommHookInterface> { + public: + explicit AllReduceCommHook(const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~AllReduceCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +class FP16CompressCommHook + : public CppCommHookInterface> { + public: + explicit FP16CompressCommHook(const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~FP16CompressCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +// Almost same as AllReduceCommHook, but without division inside the hook. +// This enables the optimization of fusing copy and division and saves one scan +// over all the input parameters, when no communication hook is provided by the +// user. Only used internally and not released as a public built-in +// communication hook. +class _AllReduceBySumCommHook + : public CppCommHookInterface> { + public: + explicit _AllReduceBySumCommHook( + const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~_AllReduceBySumCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h new file mode 100644 index 0000000000000000000000000000000000000000..e4e1a3e2d2be49904528720168ffc383b4332f9e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Facebook, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include + +#include + +namespace fmt { + +template <> +struct formatter { + constexpr auto parse(format_parse_context& ctx) const { + return ctx.begin(); + } + + template + auto format(const std::error_category& cat, FormatContext& ctx) const { + if (std::strcmp(cat.name(), "generic") == 0) { + return fmt::format_to(ctx.out(), "errno"); + } else { + return fmt::format_to(ctx.out(), "{} error", cat.name()); + } + } +}; + +template <> +struct formatter { + constexpr auto parse(format_parse_context& ctx) const { + return ctx.begin(); + } + + template + auto format(const std::error_code& err, FormatContext& ctx) const { + return fmt::format_to( + ctx.out(), "({}: {} - {})", err.category(), err.value(), err.message()); + } +}; + +} // namespace fmt + +namespace c10d::detail { + +inline std::error_code lastError() noexcept { + return std::error_code{errno, std::generic_category()}; +} + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h new file mode 100644 index 0000000000000000000000000000000000000000..b5e5c995331893cfd6f1f5a5666deeb2dd316bd3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Facebook, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include + +// Utility macro similar to C10_THROW_ERROR, the major difference is that this +// macro handles exception types defined in the c10d namespace, whereas +// C10_THROW_ERROR requires an exception to be defined in the c10 namespace. +#define C10D_THROW_ERROR(err_type, ...) \ + throw ::c10d::err_type( \ + {__func__, __FILE__, static_cast(__LINE__)}, \ + c10::str(__VA_ARGS__)) + +#define C10D_CHECK_WITH(error_t, cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + C10D_THROW_ERROR( \ + error_t, TORCH_CHECK_MSG(cond, "", c10::str(__VA_ARGS__))); \ + } + +namespace c10d { + +using c10::DistNetworkError; +using c10::DistStoreError; + +class TORCH_API SocketError : public DistNetworkError { + using DistNetworkError::DistNetworkError; +}; + +class TORCH_API TimeoutError : public DistNetworkError { + using DistNetworkError::DistNetworkError; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9cb5cf16044d0e6fc6b6a8fdd8774a17c7cfcaf4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace c10d { + +// A struct to hold the latest status of the process group. +struct ProcessGroupStatus { + // the sequential number of the last collective enqueued into workMetaList_ + // This is useful for identifying a rank that has not join a collective + // initialized to be -1 to indicate no collective has been enqueued + int64_t lastEnqueuedSeq{-1}; + // the sequential number of the last collective started as the kernel + int64_t lastStartedSeq{-1}; + // the sequential number of the last collective completed marked by + // the watchdog thread + // initialized to be -1 to indicate no collective has been completed + int64_t lastCompletedSeq{-1}; + + // the name of the last collective enqueued into workMetaList_ + std::string lastEnqueuedWorkName; + // the name of the last collective started as the kernel + std::string lastStartedWorkName; + // the name of the last collective completed + std::string lastCompletedWorkName; + + // the sizes of the last work enqueued + size_t lastEnqueuedNumelIn; + size_t lastEnqueuedNumelOut; + // the sizes of the last work completed + size_t lastCompletedNumelIn; + size_t lastCompletedNumelOut; + // the sizes of the last work started + size_t lastStartedNumelIn; + size_t lastStartedNumelOut; +}; + +class TORCH_API Logger { + public: + explicit Logger(std::shared_ptr reducer); + // Set logging data that can be got during DistributedDataParallel + // construction time. + void set_construction_data_and_log( + const std::string& module_name, + const std::vector& device_ids, + int output_device, + bool broadcast_buffers, + bool has_sync_bn, + bool static_graph); + + void set_static_graph(); + + // An interface for users to get DDPLoggingData and log them + // in the applications. Explanation of logging fields are in + // "struct DDPLoggingData" of "torch/c10/util/Logging.h". + at::DDPLoggingData get_ddp_logging_data(); + + // Stream insertion operator for logging data to stream under + // TORCH_DISTRIBUTED_DEBUG. + friend std::ostream& operator<<(std::ostream& output, const Logger& logger); + + ~Logger() noexcept(false) { + // Log if DDP graph is static in Logger dtor instead of Reducer dtor since + // Logger is deleted before Reducer. + log_if_graph_static(reducer_->ddp_graph_static()); + } + + // Set environment variables. + void set_env_variables(); + // Set parameters stats. + void set_parameter_stats(); + // Get size of each bucket (Bytes). + std::vector get_bucket_sizes(); + // Get variable indices for each bucket. + std::vector> get_per_bucket_variable_indices(); + // Set comm. hook, if used + void set_comm_hook(const std::string& hook); + // Set running with uneven input detection (model.join() context manager) + void set_uneven_input_join(); + + // Reset performance stats at current iteration + void reset_performance_stats(); + + // Calculate avg stats using cpu timer and gpu timer + // that has been recorded in reducer. + void calculate_avg_time( + int64_t& avg_time, + int64_t& time_duration, + Timer& timer, + Timer::Event start_event, + Timer::Event end_event); + + // Set the absolute time of the event that has been recorded in reducer. + void set_event_time(int64_t& event_time, Timer& timer, Timer::Event event); + // Set stats that can be collected only during + // training loop. It is called at the beginning of forward call + // to record the run time stats of sampled iterations that previously ran. + // GPU performance stats are collected only for single process + // single device program and single device module right now. + // TODO to support single process multiple devices and multi device modules, + // events need to be created and recorded on multiple devices. + void set_runtime_stats_and_log(); + + // Called when DDP/reducer is failing with an error. The + // logging data structure will have two fields filled: "has_error" indicating + // that this iteration encountered an error and other fields are not valid, + // and "error", a string which contains the error message that DDP failed + // with. + template + void set_error_and_log(const std::string& ddp_error, const Args&... args) { + ddp_logging_data_->ints_map["has_error"] = 1; + auto err = c10::str(ddp_error, args...); + ddp_logging_data_->strs_map["error"] = err; + // Report the iteration we are erroring at so user knows how many examples + // successfully processed before this error was hit. + ddp_logging_data_->ints_map["iteration"] = reducer_->num_iterations_; + at::LogPyTorchDDPUsage(*ddp_logging_data_); + } + + // When running without static graph, called when reducer is destroyed to log + // if graph was actually static and is a candidate for static graph + // optimization. + void log_if_graph_static(bool is_static); + + private: + // ddp_logging_data_ is used to hold all the ddp related logging + // data fields. + std::unique_ptr ddp_logging_data_; + std::shared_ptr reducer_; + // track the number of iterations when runtime stats are collected so far. + long num_iterations_stats_recorded_ = 0; +}; + +// a generic logging data struct that holds different types of logging data. +// starting with key value pairs of strings and integers, +// It can be extended to more types as needed. +struct C10dLoggingData { + // logging fields that are string types. + std::map strings; + // logging fields that are int64_t types. + std::map integers; +}; + +class TORCH_API C10dLogger { + public: + C10dLogger(const C10dLogger&) = default; + C10dLogger(C10dLogger&&) = delete; + C10dLogger& operator=(const C10dLogger&) = default; + C10dLogger& operator=(C10dLogger&&) = delete; + virtual ~C10dLogger() = default; + virtual void log(const C10dLoggingData& data); + static C10dLogger* getLogger(); + static void registerLogger(std::unique_ptr /*logger*/); + + protected: + // singletion, hide constructor from the public + C10dLogger(std::string logDestination) + : logDestination_(std::move(logDestination)) {} + + // the name of the destination this logger should log to + std::string logDestination_; + + private: + static std::unique_ptr logger_; + static std::atomic registered_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..596e4686212800c6ac419ac9fd77519d90257160 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h @@ -0,0 +1,52 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +#include +#include +#include + +namespace c10d::detail { + +enum class LogLevel { Trace, Debug, Info, Warning, Error }; + +TORCH_API bool isLogLevelEnabled(LogLevel level) noexcept; + +template +// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) +std::string formatLogMessage(fmt::string_view fmt, T&&... args) { + return fmt::vformat(fmt, fmt::make_format_args(args...)); +} + +} // namespace c10d::detail + +#define C10D_ERROR(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Error)) \ + LOG(ERROR) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_WARNING(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Warning)) \ + LOG(WARNING) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_INFO(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Info)) \ + LOG(INFO) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_DEBUG(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Debug)) \ + LOG(INFO) << "[c10d - debug] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_TRACE(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Trace)) \ + LOG(INFO) << "[c10d - trace] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp new file mode 100644 index 0000000000000000000000000000000000000000..27c25dc8235c65258a8fa05d88eca27ceb87416a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { + +// PythonCallbackWork is a subclass of Work that wraps a Python callback +// function that implements wait(). This allows asynchronous work to +// be integrated with Python code, enabling custom completion logic or +// post-processing in Python. +class PythonCallbackWork : public Work { + public: + explicit PythonCallbackWork(py::function callback); + + ~PythonCallbackWork() override; + + bool wait(std::chrono::milliseconds timeout) override; + + c10::intrusive_ptr getFuture() override; + + private: + py::function callback_; + c10::intrusive_ptr future_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h new file mode 100644 index 0000000000000000000000000000000000000000..e6e985f87ced478c67147c8405303e682aac7403 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include + +namespace c10d { + +class TORCH_PYTHON_API PythonCommHook : public CommHookInterface { + public: + // Takes a state and a callable hook. The inputs are Python objects. + // The state is passed to the hook in runHook method, and it can be used to + // maintain and update any state information during the execution of the hook. + // The hook performs user-specified processing and returns a future indicating + // asynchronous communication of gradients. + PythonCommHook(py::object state, py::object hook) + : state_(std::move(state)), hook_(std::move(hook)) {} + + ~PythonCommHook() override; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; + + at::Tensor parseHookResult(const c10::IValue& result) override; + + private: + // Only needed for stateful communication. + py::object state_; + py::object hook_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h new file mode 100644 index 0000000000000000000000000000000000000000..08e8f2948af7ec89093fb8c7701bf7e2fe21a696 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace torch::distributed::c10d::quantization { + +at::Tensor _float_to_bfloat16_cpu(const at::Tensor& input); +at::Tensor _bfloat16_to_float_cpu(const at::Tensor& input); + +} // namespace torch::distributed::c10d::quantization + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..9c21a8f5a07cfab89ce47f08d201abb1f7e1bb32 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace torch::distributed::c10d::quantization { + +at::Tensor _float_to_bfloat16_cuda(const at::Tensor& input); +at::Tensor _bfloat16_to_float_cuda(const at::Tensor& input); + +} // namespace torch::distributed::c10d::quantization + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..a2a6a171ba92a0741a7cc6f1990987917ff1b657 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +#include + +inline std::string torch_tensor_device_name(const at::Tensor& ten) { + return c10::DeviceTypeName(ten.device().type()); +} + +#define TENSOR_NDIM_EQUALS(ten, dims) \ + TORCH_CHECK( \ + (ten).ndimension() == (dims), \ + "Tensor '" #ten "' must have " #dims \ + " dimension(s). " \ + "Found ", \ + (ten).ndimension()) + +#define TENSOR_ON_CPU(x) \ + TORCH_CHECK( \ + !x.is_cuda(), \ + #x " must be a CPU tensor; it is currently on device ", \ + torch_tensor_device_name(x)) + +#define TENSOR_ON_CUDA_GPU(x) \ + TORCH_CHECK( \ + x.is_cuda(), \ + #x " must be a CUDA tensor; it is currently on device ", \ + torch_tensor_device_name(x)) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a8a5b08a4d2166a883ff047149a35abe0840a118 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp @@ -0,0 +1,607 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif + +namespace c10d { + +constexpr int kDefaultFirstBucketBytes = 1024 * 1024; +constexpr int kDefaultBucketBytesCap = 25 * 1024 * 1024; +// Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations. +constexpr int kDDPRuntimeLoggingSampleRate = 100; + +// Forward declaration +class Logger; + +// Local accumulator type for a single bucket. +struct BucketAccumulator { + std::vector indices; + size_t size = 0; + size_t size_limit = 0; +}; + +class TORCH_API Reducer { + public: + // The constructor takes a list of variables (i.e. parameters) for this + // process's single model replica (as DDP assumes single-process + // single-device). The bucket assignment for this reducer, `bucket_indices`, + // is specified as a list of buckets, each of which is specified as a list of + // indices into the bucket's `variables` list. + explicit Reducer( + std::vector params, + std::vector> bucket_indices, + c10::intrusive_ptr process_group, + std::vector expect_sparse_gradients, + int64_t bucket_bytes_cap, + bool find_unused_parameters, + bool gradient_as_bucket_view, + std::unordered_map param_names, + int64_t first_bucket_bytes_cap, + bool skip_all_reduce_unused_params, + bool use_python_reducer); + + ~Reducer() noexcept(false); + + // To (re-)initialize bucket assignment, pass a list of buckets, each of + // which is specified by a list of indices in the bucket's `variables` list. + // This function performs validation that the variables within a bucket + // all live on the same device and have the same dimensionality. + void initialize_buckets(std::vector> bucket_indices); + + void autograd_hook(size_t index); + + // This function is called when the forward function has produced an output, + // and the user wishes to reduce gradients in the backwards pass. + // If they don't, and wish to accumulate gradients before reducing them, + // a call to this function can simply be omitted. + void prepare_for_backward(const std::vector& outputs); + + // Called at the beginning of forward() inside DistributedDataParallel, + // right now it captures the starting time of forward in each iteration. + void prepare_for_forward(); + + // Returns the relative time in nanoseconds when gradients were ready, + // with respect to the time `prepare_for_backward` was called. The + // vector is for parameters for a single model replica. + std::vector get_backward_stats() const { + return backward_stats_; + } + + // Registers a hook to the reducer. The hook is `CommHookInterface` + // type to allow both Python and CPP hooks. This function can only + // be called once before calling backward. + // Cannot combine with the call of `register_builtin_comm_hook`. + void register_comm_hook(std::unique_ptr iface); + + // Registers a built-in C++ comm hook to the reducer. This function can only + // be called once before calling backward. + // Cannot combine with the call of `register_comm_hook`. + void register_builtin_comm_hook(c10d::BuiltinCommHookType comm_hook_type); + + // Informs reducer that optimizer is running in backward, so gradients + // don't need to be copied from buckets as the optimizer would've already + // been applied. + void set_optimizer_in_backward() { + optim_in_backward_ = true; + } + + // Runs allreduce or installed communication hook given GradBucket instance. + c10::intrusive_ptr run_comm_hook( + GradBucket& grad_bucket); + + // Runs default allreduce hook. + c10::intrusive_ptr run_allreduce_hook( + GradBucket& grad_bucket); + + // Returns gradient buckets in sequential order of buckets_. This is the order + // in which buckets are reduced across processes. If return_zero_tensors=true, + // will return zero tensors of the same shape instead of the true tensors. + std::vector get_grad_buckets( + bool return_zero_tensors = true) const; + + // Rebuild buckets based on rebuilt_params_ and rebuilt_param_indices_ + // according to when tensors received grads in the backward pass. + // TODO this function makes broadcast communication call and + // could be overlapped with next forward() call, thus + // it could be async. Will make it async when rebuilding buckets for + // find_unused_parameters = true case, as we could rebuild buckets more than + // once for find_unused_parameters = true case, where subgraphs are trained + // and parameter indices order may change more frequently. + // For find_unused_parameters = false case, buckets are only rebuilt once, + // the performance cost is negligible. Returns true if the buckets were + // rebuilt. + bool rebuild_buckets(); + + void setSparseMetadata(std::map& metadata); + + // Install futures that should be awaited at end of backwards. Currently these + // are only used by user-defined custom buffer reduction hooks, but can be + // generalized to any user-originating futures that need to be awaited. + void install_futures( + const c10::List>& futs); + + // Returns true if we should rebuild buckets, else false. We only rebuild + // buckets once after the first iteration and never rebuild them if + // find_unused_parameters_. + inline bool should_rebuild_buckets() const { + return (static_graph_ || !find_unused_parameters_) && !has_rebuilt_bucket_; + } + + // Pushes all parameters to be rebuilt. + void push_rebuilt_params_for_all_indices(); + + // Creates and sets ForwardPassWorkHandle given a Work and the + // corresponding tensor being reduced. + void set_forward_pass_work_handle( + c10::intrusive_ptr forwardPassWorkHandle, + bool useStaticWorldSize); + + // Retrieve on-device tensors used to track locally unused parameters. It is + // a tensor where index i = 1 if the Variable with that index has been used. + at::Tensor get_local_used_map_on_device() const; + + // An function for users to set sample_rate of collecting + // runtime stats. The time stats will be recorded for the + // first 10 iterations, after 10 iterations time stats will be + // recorded once every "sample_rate" training iterations. + void set_ddp_runtime_logging_sample_rate(int sample_rate); + + // Specify the training graph is static. + void set_static_graph(); + + // Delay all reduce to be after all gradients' calculation is complete. + void delay_all_reduce(); + + void set_mixed_precision_param_dtype(c10::ScalarType dtype); + + // Weak reference to associated DDP logger. The reference is weak to avoid + // refcycle between reducer and logger. + void set_logger(std::weak_ptr logger); + + // When graph is not explicitly set by user as static and has unused + // parameters, this will return whether the graph has been static until the + // current iteration, which means unused params set has not changed. + bool ddp_graph_static(); + + // Removes autograd hooks registered by the Reducer on the model parameters. + void remove_autograd_hooks(); + + // Checks whether or not the reducer has finalized the current backward + // iteration. + void check_finalized(); + + // Updates the underlying process group used by DDP with the new process + // group. + void update_process_group( + c10::intrusive_ptr new_process_group); + + // Resets reducer state. + void reset_state(); + + protected: + // Forward declaration. + struct Bucket; + + void push_rebuilt_params(const size_t& index); + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + mutable std::mutex mutex_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector params_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + c10::intrusive_ptr<::c10d::ProcessGroup> process_group_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector expect_sparse_gradients_; + + std::vector> + grad_accumulators_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes) + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unordered_map gradAccToVariableMap_; + std::vector>> + hooks_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes) + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool expect_autograd_hooks_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool require_finalize_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t next_bucket_; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool has_marked_unused_parameters_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const bool find_unused_parameters_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const bool gradient_as_bucket_view_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector unused_parameters_; + // Previous iteration's unused params, used for checking if unused parameters + // change between iterations. Only filled during the first backwards call. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector prev_iteration_unused_parameters_; + // Whether graph is static or not. When user does not explicitly set static + // graph, the only possible dynamism is set of unused parameters changing + // between iterations which is tracked by this flag. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool ddp_graph_static_{true}; + // Locally used parameter maps indicating if parameters are used locally + // during the current iteration or no_sync session if no_sync is on. + // Each map is a one-dim int32 tensor of number of parameters. These tensors + // are marked in autograd_hook to indicate the corresponding param has been + // used, and get allreduced in the end of backward step of current iteration + // or no_sync session for figuring out the globally unused parameters. + // + // local_used_map_: CPU tensor for bookkeeping locally used params + // local_used_map_dev_: dev tensor for reducing globally unused params + at::Tensor local_used_map_; + at::Tensor local_used_map_dev_; + // Indicate that reduction is done and D2H copy is done as well. + bool local_used_map_reduced_; + + // Weak pointer to associated DDP logger. + std::weak_ptr logger_; + // List of futures installed by Reducer::install_futures that should be + // awaited at the end of backwards pass. + std::optional>> + installed_futures_{std::nullopt}; + // Mixed precision parameter dtype for bucket type checking. + std::optional mixed_precision_param_dtype_{std::nullopt}; + + // Work handle for allreduce on local_used_map_ + c10::intrusive_ptr local_used_work_; + + void mark_variable_ready_dense(size_t variable_index); + + void mark_variable_ready_sparse(size_t variable_index); + + void mark_variable_ready(size_t variable_index); + + void mark_bucket_ready(size_t bucket_index); + + void finalize_bucket_dense(Bucket& bucket); + + void finalize_backward(); + + // Returns list of model parameters corresponding to the given bucket. + // bucket_index is a key to cache after buckets are rebuilt, after which this + // mapping never changes. + std::vector get_variables_for_bucket( + size_t bucket_index, + const Bucket& bucket) const; + + // Asserts that the reduction for the previous iteration has finished before + // rebuilding buckets or kicking off the next one. + void ensure_prior_reduction_finished(); + + // Broadcast rebuilt buckets from rank 0 to other ranks before initializing + // the buckets + void sync_bucket_indices(std::vector>& bucket_indices); + + // We'd like to use DistAutogradContext::GradCallback here but dist autograd + // doesn't exist under Windows. So we just directly use the concrete type but + // to preserve and enforce our original intent we do a static assert when dist + // autograd is available. + using GradCallback = std::function; +#ifndef _WIN32 + static_assert( + std::is_same_v< + GradCallback, + torch::distributed::autograd::DistAutogradContext::GradCallback>); +#endif + void runGradCallbackForVariable(at::Tensor& variable, const GradCallback& cb); + + // This function is called inside `initialize_buckets()`. It initializes both + // `bucket_views_in` and `bucket_views_out` with views for each variable's + // gradient into the bucket's flattened `gradients` tensor. Views serve as + // entry points to `copy_()` each grad's data in/out of the flattened + // `gradients` tensor. + void initialize_bucket_views(Bucket& bucket); + + // This function is called inside `finalize_backward`, it happens only if + // DDP communication hook was registered to recreate just bucket_views_out + // with the result of `future_work`. + void populate_bucket_views_out(Bucket& bucket, at::Tensor& tensor); + + // If gradient_as_bucket_view_ is false, after allreduce buckets, + // copy bucket results back to grads. + void copy_bucket_to_grad( + at::Tensor& variable, + Reducer::Bucket& bucket, + size_t intra_bucket_index, + bool global_unused); + // Check layout of grad and bucket_view before copying the grad to bucket. + void check_grad_layout(const at::Tensor& grad, const at::Tensor& bucket_view); + + // A bucket contains [1..N] gradients to be reduced, where the gradients + // have the same dtype and device. + // Coalescing gradients together before reducing can result in lower overhead + // and/or faster time to completion. Coalescing requires the constituent + // gradients to have the same dtype and device, and the resulting flattened + // tensor uses that common dtype and device. The flattened tensor is filled + // as the corresponding gradients are computed (triggered by autograd hooks), + // and the buckets are reduced in a predetermined order consistent across + // processes. + struct Bucket { + // Gradients of the bucket flattened into a 1-dimensional tensor + at::Tensor gradients; + + // Views into the `gradients` tensor for each individual gradient + // Each view is created with layout (size and stride) matching the + // gradient's expected layout (see the "Gradient Layout Contract" in + // torch/csrc/autograd/functions/accumulate_grad.h). + // `bucket_views_in[i].copy_(grad)` and `grad.copy_(bucket_views_out[i])` + // provide convenient ways to copy gradient data in/out of `gradients`, + // respectively. + // We keep both `bucket_views_in` and `bucket_views_out` because + // registering a DDP communication hook may re-initialize + // `bucket_views_out` with the value of the hook's `future_work` but we + // still need separate views into the bucket's original flattened gradient + // to copy in gradient data. + std::vector bucket_views_in; + std::vector bucket_views_out; + + // Variables whose gradients are held in this bucket + // We use refcounted tensors here so that we can easily unflatten the + // bucket's flattened `gradients` tensor into the participating variables + // after reduction has completed. + std::vector variables; + + // Per-variable offset/length into the flattened `gradients` tensor and + // the corresponding `GradBucket` instance for communication hooks + std::vector offsets; + std::vector lengths; + + // Per-variable sizes slicing into the bucket's `gradients` tensor + std::vector sizes_vec; + + // Number of gradients left to be computed before the bucket is ready to + // be reduced + size_t pending; + + // Global indices of participating variables in the bucket + std::vector variable_indices; + + // Future work handle for DDP communication hook + // If no hook is registered, a temporary vanilla allreduce hook is used. + c10::intrusive_ptr future_work; + + // if this bucket contains complex parameters + bool is_complex_bucket = false; + + // If this bucket should expect a single sparse gradient + // If `true`, then this implies that `bucket.variables.size() == 1`. + bool expect_sparse_gradient = false; + + // Sparse indices tensor + std::optional sparse_tensor_indices = std::nullopt; + + // TODO(@pietern) + // Memory copies from gradient tensors into the bucket are potentially + // done on different CUDA streams. We record an event for every copy + // so that we can synchronize with them prior to kicking off the reduction. + // std::vector events; + }; + + std::vector buckets_; + + // A variable locator locates a particular variable in the reducer's buckets + struct VariableLocator { + // Index of the bucket containing the variable in the `buckets_` vector + size_t bucket_index; + // Index of the variable in the bucket, which may be used consistently + // across `bucket_views_in`, `bucket_views_out`, `variables`, `offsets`, + // `lengths`, `sizes_vec`, and `variable_indices` in `Bucket` + size_t intra_bucket_index; + + VariableLocator() = default; + + VariableLocator(size_t bucket_index_, size_t intra_bucket_index_) + : bucket_index(bucket_index_), + intra_bucket_index(intra_bucket_index_) {} + }; + + // Map the index of a variable to its location in the bucket structure. + std::vector variable_locators_; + + // track the number of iterations to synchronize grads in training so far. + long num_iterations_; + // track distinct iteration of backward call. This is distinct from + // num_iterations_, for example in the case of multiple forward before + // backward. + long num_bwd_calls_; + // whether the first autograd hook for a distinct backward pass has been + // called. + bool first_autograd_hook_called_; + // track the number of buckets that have been ready for + // communication calls like allReduce or communication hooks. + int num_buckets_ready_; + // track the number of buckets that have been reduced. + int num_buckets_reduced_; + + // Timing information. + int64_t backward_compute_start_time_ = -1; + std::unique_ptr timer_; + + // We collect the relative timestamp of every gradient being ready + // when executing autograd. This can be used to derive a timeline of + // the point in time buckets were ready, or ideal bucket assignment/ordering. + std::vector backward_stats_; + + bool should_collect_runtime_stats(); + void record_forward_compute_start_time(); + void record_backward_compute_start_time(); + void record_backward_compute_end_time(); + void record_backward_comm_start_time(); + void record_backward_comm_end_time(); + + int get_ddp_runtime_logging_sample_rate(); + int ddp_runtime_logging_sample_rate_ = kDDPRuntimeLoggingSampleRate; + + bool is_multi_device_module_ = false; + + // Following variables are to help build dynamic bucket order + bool has_rebuilt_bucket_; + std::vector rebuilt_params_; + std::vector rebuilt_param_indices_; + const int64_t bucket_bytes_cap_; + +#ifndef _WIN32 + struct RpcContext { + using ContextPtr = torch::distributed::autograd::ContextPtr; + // The shared_ptr is to hold the context instance. + ContextPtr context_ptr_holder; + std::atomic context_ptr{nullptr}; + + void set(ContextPtr&& new_context_ptr); + }; + RpcContext rpc_context_; +#endif + + // A struct containing work handle and tensor for allreduce scheduled in + // forward pass, if applicable. + struct ForwardPassAllreduceWork { + c10::intrusive_ptr workHandle; + at::Tensor resultTensor; + // whether we should divide by the initial world_size or the no. of + // remaining DDP ranks. + bool useStaticWorldSize; + }; + + // Handle for the currently scheduled allreduce in the forward pass, if + // applicable. + ForwardPassAllreduceWork forwardPassWorkHandle_; + + // Division factor for reduction of gradients. + // Equal to the process group size, with an exception of handling uneven + // input. + int div_factor_; + + bool static_graph_; + + bool skip_all_reduce_unused_params_; + + // Key: size_t (index), Value: the number of times that a variable's + // autograd_hook() should be triggered before marking this variable's grad as + // ready for communication. Map will not change after 1st iteration. + std::unordered_map numGradHooksTriggeredMap_; + // Key: size_t (index), Value: the number of times that a variable's + // autograd_hook() are left to be triggered before marking this variable's + // grad as ready for communication. Map will change after 1st iteration to + // track a grad is ready for communication or not. + std::unordered_map numGradHooksTriggeredMapPerIteration_; + + private: + // reset counting for buckets before backward starts + void reset_bucket_counting(); + // search unused parameters beore backward starts + void search_unused_parameters( + const std::vector& outputs); + void set_divide_factor(); + // kick off all reduce for the ready bucket + void all_reduce_bucket(Bucket& bucket); + // kick off all reduce to local used map, it can help find global unused + // parameters + void all_reduce_local_used_map(); + // initialize locally used parameter maps + void initialize_local_used_map(); + // get current cuda stream + const c10::Stream get_current_stream(); + bool dynamic_graph_find_unused(); + bool static_graph_first_iteration(); + bool static_graph_after_first_iteration(); + + bool is_unused_bucket(Bucket& bucket); + bool should_skip_all_reduce_bucket(Bucket& bucket); + + // comm_hook_ is used to access the DDP communication hook if registered. + std::unique_ptr comm_hook_; + + // Sparse metadata contains the indices that will be used + // when calling into sparse allreduce. + // This is only used in the sparse allreduce collective calls + std::unique_ptr> sparse_metadata_; + + // Debug level setting. It is parsed once when Reducer is constructed, and + // remains the same across a single invocation of DDP training. + DebugLevel ddp_debug_level_; + // Mapping of variable index to fully qualified name of model to notify users + // about errors when certain parameters do not get gradient. + std::unordered_map param_names_; + // Variable indices stored sequentially in order of when the gradient is ready + // for the current backwards pass. + std::vector grad_ready_order_indices_; + // Bytes capacity of first bucket, can be configured by user + int64_t first_bucket_bytes_cap_; + // Per iteration set of parameter indices that have been marked ready. + std::unordered_set perIterationReadyParams_; + // Retrieves parameter names that have not been marked as ready as part of + // previous iteration. + std::vector getUnmarkedParamsForIteration(); + // Retrieves parameter indices that have not been marked as ready as part of + // previous iteration. + std::vector getUnmarkedParamIndicesForIteration(); + // Raises appropriate error if mark_variable_ready is called on the same + // variable twice, which is unexpected. + void checkAndRaiseMarkedTwiceError(size_t curVariableIndex); + // Retrieves parameter corresponding to the given VariableIndex. + at::Tensor& get_param_from_index(size_t index); + // Python reducer keeps C++ reducer initialized. To remove this flag, + // we need to refactor the DDP wrapper's initialization. + bool use_python_reducer_; + + // Cached bucket index to model parameter mapping. Populated after buckets + // are rebuilt after which this mapping is static. + mutable std::unordered_map> + cached_variables_for_bucket_; + + bool optim_in_backward_{false}; + friend class Logger; +}; + +// This is equivalent to take_tensors but returns indices into the +// tensor list argument for bucket assignment. Also, it is aware +// of device placement and will not allow buckets to span devices. +// The index of tensors[i] assigned to bucket is tensor_indices[i], +// when tensor_indices is empty, the index of tensors[i] assigned to +// bucket is i. +TORCH_API std::tuple>, std::vector> +compute_bucket_assignment_by_size( + const std::vector& tensors, + const std::vector& bucket_size, + const std::vector& expect_sparse_gradient = {}, + const std::vector& tensor_indices = {}, + const std::optional>& logger = {}); + +// Verify models across all processes are the same as model on rank 0 with +// respect to no. of params and matching dtype/size/layout. +TORCH_API void verify_params_across_processes( + const c10::intrusive_ptr& process_group, + const std::vector& params, + const std::optional>& logger); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7b4320325c5ef027f4ad47a789b67e5168ee7dd7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace c10d { +constexpr int kUnsetTime = -1; + +inline int64_t current_time_in_nanos() { + return c10::getTime(); +} + +class TORCH_API Timer { + private: + // The timestamp of forward call start time in each iteration. + int64_t forward_start_time = kUnsetTime; + // The timestamp of backward computation start and end time in each + // iteration. + int64_t backward_compute_start_time = kUnsetTime; + int64_t backward_compute_end_time = kUnsetTime; + // The timestamp of first communication call start time in each iteration. + int64_t backward_comm_start_time = kUnsetTime; + // The timestamp of last communication call end time in each iteration. + int64_t backward_comm_end_time = kUnsetTime; + + public: + enum class Event : uint8_t { + kForwardStart, + kBackwardComputeStart, + kBackwardComputeEnd, + kBackwardCommStart, + kBackwardCommEnd, + }; + + // Record the current event, i.e., mark it as having occurred now. Default + // CPU implementation. + virtual void record(Event event) { + getTimeRef(event) = current_time_in_nanos(); + } + + // Return the difference between when two events occurred, in nanoseconds. + // Or nullopt if one of them hasn't been recorded. + virtual std::optional measureDifference(Event start, Event end) = 0; + + virtual ~Timer() = default; + + // Return host-side timestamp, or nullopt if it has not yet been recorded. + std::optional getTimestamp(Event event) { + auto time = getTimeRef(event); + if (time == kUnsetTime) { + return std::nullopt; + } else { + return time; + } + } + + // Return host-side time member variable corresponding to the given event. + int64_t& getTimeRef(Event event) { + switch (event) { + case Event::kForwardStart: + return forward_start_time; + case Event::kBackwardComputeStart: + return backward_compute_start_time; + case Event::kBackwardComputeEnd: + return backward_compute_end_time; + case Event::kBackwardCommStart: + return backward_comm_start_time; + case Event::kBackwardCommEnd: + return backward_comm_end_time; + default: + TORCH_INTERNAL_ASSERT(false); + } + } +}; + +TORCH_DECLARE_TYPED_REGISTRY( + TimerRegistry, + c10::DeviceType, + Timer, + std::unique_ptr, + c10::Device); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa88322a6fa02adc0285afecf0fa128048f310f4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d { +constexpr int kUnsetSeqNum = 0; + +namespace { +constexpr int kByteOffset = 8; +} // namespace + +// Converts from int to char vec to write in store +template +inline std::vector toVec(uint64_t num, int numBytes) { + std::vector values; + // Read off bytes from right to left, pushing them into + // char array. + for (const auto i : c10::irange(numBytes)) { + uint8_t x = (num >> (kByteOffset * i)) & 0xff; + values.push_back(static_cast(x)); + } + return values; +} + +// Converts from char vec (such as from store read) to int. +template +inline uint64_t fromVec(const std::vector& values) { + uint64_t num = 0; + // Set each byte at the correct location on num + for (const auto i : c10::irange(values.size())) { + uint8_t x = static_cast(values[i]); + num |= (static_cast(x) << (kByteOffset * i)); + } + return num; +} + +class TORCH_API SequenceNum { + public: + SequenceNum(); + explicit SequenceNum(const uint64_t num); + // Retrieve num_. Will throw if not set. + uint64_t get() const; + // Increment num_. Will throw if not set. + void increment(); + // Increment num_ and return the old value. Will throw if not set. + uint64_t getAndIncrement(); + // Sets num_ + void set(const uint64_t num); + // Returns true if this SequenceNum is properly initialized with a value, else + // false. + bool isSet() const; + + SequenceNum& operator=(const SequenceNum& other); + + SequenceNum(const SequenceNum& other); + + private: + std::optional num_; + mutable std::mutex lock_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h new file mode 100644 index 0000000000000000000000000000000000000000..8d2ec93016c028f4d7c79410ab5c3a11cfa5264f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h @@ -0,0 +1,110 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10d::detail { + +class SocketOptions { + public: + SocketOptions& prefer_ipv6(bool value) noexcept { + prefer_ipv6_ = value; + + return *this; + } + + bool prefer_ipv6() const noexcept { + return prefer_ipv6_; + } + + SocketOptions& connect_timeout(std::chrono::milliseconds value) noexcept { + connect_timeout_ = value; + + return *this; + } + + std::chrono::milliseconds connect_timeout() const noexcept { + return connect_timeout_; + } + + // Sets the backoff policy to use for socket connect ops. + SocketOptions& connect_backoff(std::shared_ptr value) noexcept { + connect_backoff_ = std::move(value); + + return *this; + } + + const std::shared_ptr& connect_backoff() const noexcept { + return connect_backoff_; + } + + private: + bool prefer_ipv6_ = true; + std::chrono::milliseconds connect_timeout_{std::chrono::seconds{30}}; + std::shared_ptr connect_backoff_{ + std::make_shared(std::chrono::milliseconds(1000))}; +}; + +class SocketImpl; + +class Socket { + public: + // This function initializes the underlying socket library and must be called + // before any other socket function. + static void initialize(); + + static Socket listen(std::uint16_t port, const SocketOptions& opts = {}); + + static Socket listenFromFd(int fd, std::uint16_t expected_port); + + static Socket connect( + const std::string& host, + std::uint16_t port, + const SocketOptions& opts = {}); + + Socket() noexcept = default; + + Socket(const Socket& other) = delete; + + Socket& operator=(const Socket& other) = delete; + + Socket(Socket&& other) noexcept; + + Socket& operator=(Socket&& other) noexcept; + + ~Socket(); + + Socket accept() const; + + int handle() const noexcept; + + std::uint16_t port() const; + + bool waitForInput(std::chrono::milliseconds timeout); + + std::string repr() const; + + private: + explicit Socket(std::unique_ptr&& impl) noexcept; + + std::unique_ptr impl_; +}; +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h new file mode 100644 index 0000000000000000000000000000000000000000..f333c8fa12d8ad4f321072b9379c24529c7d3507 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +/* +This file should not be included from other .h files and only used in cpp files +as it exposes the underlying platform specific socket headers. +*/ + +#include + +#ifdef _WIN32 +#include + +#include +#include +#else +#include +#endif + +namespace c10d::detail { + +// Returns a human-readable representation of the given socket address. +std::string formatSockAddr(const struct ::sockaddr* addr, socklen_t len); + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..480803752d9f3026d6b73a09cb1700a7c1678e5c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h @@ -0,0 +1,363 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900) && CUDART_VERSION >= 12010 +#define NVCC_SUPPORTS_MULTICAST 1 +#endif + +#include +#if defined(USE_ROCM) +#include +#endif +#if !defined(USE_ROCM) +#include +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) +#include +#endif +#endif +#include + +namespace c10d::symmetric_memory { + +template +using Vec = at::native::memory::Vec; + +template +inline constexpr bool dependent_false = + at::native::memory::dependent_false; + +using at::native::memory::get_alignment; + +template +__device__ __forceinline__ uint32_t +cas(uint32_t* addr, uint32_t compare, uint32_t val) { +#if !defined(USE_ROCM) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) + ::cuda::atomic_ref ref(*addr); + ref.compare_exchange_strong(compare, val, ::cuda::std::memory_order(Sem)); + return compare; +#elif defined(USE_ROCM) + __atomic_compare_exchange_n( + addr, &compare, val, false, static_cast(Sem), __ATOMIC_RELAXED); + return compare; +#else + CUDA_KERNEL_ASSERT(false); + return 0; +#endif +} + +__device__ __forceinline__ void trap() { +#if defined(USE_ROCM) + // abort() calls trap() under the covers. However, on ROCm, the trap is + // handled differently inside hip runtime. It collects a gpu core dump and + // causes linux kernel to create a core dump of the host application. + abort(); +#else + __trap(); +#endif +} + +__device__ __forceinline__ size_t global_timer_ns() { +#if defined(USE_ROCM) + static constexpr double MI300_FREQ_GHZ = 2.1; + return clock64() / MI300_FREQ_GHZ; +#else + size_t val; + asm volatile("mov.u64 %0, %globaltimer;" : "=l"(val) : : "memory"); + return val; +#endif +} + +constexpr size_t ns_per_ms = 1e6; + +template +__device__ __forceinline__ bool try_put_signal( + uint32_t* addr, + size_t timeout_ms) { + size_t deadline = global_timer_ns() + timeout_ms * ns_per_ms; + while (cas(addr, 0, 1) != 0) { + if (timeout_ms != 0 && global_timer_ns() > deadline) { + return false; + } + } + return true; +} + +template +__device__ __forceinline__ bool try_wait_signal( + uint32_t* addr, + size_t timeout_ms) { + size_t deadline = global_timer_ns() + timeout_ms * ns_per_ms; + while (cas(addr, 1, 0) != 1) { + if (timeout_ms != 0 && global_timer_ns() > deadline) { + return false; + } + } + return true; +} + +template +__device__ __forceinline__ void put_signal(uint32_t* addr) { + while (cas(addr, 0, 1) != 0) + ; +} + +template +__device__ __forceinline__ void wait_signal(uint32_t* addr) { + while (cas(addr, 1, 0) != 1) + ; +} + +// Synchronizes blocks with matching blockIdx across participating devices. +// Note: sync_remote_block itself is not a system level barrier/fence. It is a +// building block for expressing different synchronization patterns. +// +// Pattern 0: Ensures that all writes to symm_mem buffers from previous +// kernels across all devices are visible to the current kernel: +// +// sync_remote_blocks(...); +// __syncthreads(); +// +// Pattern 1: Ensures that all writes to symm_mem buffers from the current +// block are visible to all remote blocks with matching blockIdx: +// +// __syncthreads(); +// sync_remote_blocks(...); +// __syncthreads(); +// +// Pattern 2: Ensures that symm_mem buffers read by the current kernel are safe +// for writing by subsequent kernels across all devices. +// +// __syncthreads(); +// sync_remote_blocks(...); +template +__device__ __forceinline__ void sync_remote_blocks( + uint32_t** signal_pads, + size_t rank, + size_t world_size) { + if (threadIdx.x < world_size) { + auto target_rank = threadIdx.x; + if constexpr (hasPrevMemAccess) { + put_signal( + signal_pads[target_rank] + blockIdx.x * world_size + rank); + } else { + put_signal( + signal_pads[target_rank] + blockIdx.x * world_size + rank); + } + if constexpr (hasSubsequentMemAccess) { + wait_signal( + signal_pads[rank] + blockIdx.x * world_size + target_rank); + } else { + wait_signal( + signal_pads[rank] + blockIdx.x * world_size + target_rank); + } + } +}; + +template +struct MultimemLdReduce { + template + __device__ __inline__ Vec operator()(T* mc_ptr) { + static_assert(dependent_false); + } +}; + +template +__device__ __inline__ Vec multimem_ld_reduce_add(T* mc_ptr) { + MultimemLdReduce functor; + return functor.template operator()(mc_ptr); +} + +#if defined(USE_ROCM) || !defined(NVCC_SUPPORTS_MULTICAST) +#define SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(type, asm_type, acc_prec) \ + template <> \ + struct MultimemLdReduce { \ + template \ + __device__ __inline__ Vec operator()(type* mc_ptr) { \ + CUDA_KERNEL_ASSERT(false); \ + } \ + }; +#else +#define SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(type, asm_type, acc_prec) \ + template <> \ + struct MultimemLdReduce { \ + template \ + __device__ __inline__ Vec operator()(type* mc_ptr) { \ + Vec vec; \ + if constexpr (Alignment == 16) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec \ + ".v4" asm_type " {%0,%1,%2,%3}, [%4];" \ + : "=r"(vec.u32[0]), \ + "=r"(vec.u32[1]), \ + "=r"(vec.u32[2]), \ + "=r"(vec.u32[3]) \ + : "l"(mc_ptr) \ + : "memory"); \ + } else if constexpr (Alignment == 8) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec \ + ".v2" asm_type " {%0,%1}, [%2];" \ + : "=r"(vec.u32[0]), "=r"(vec.u32[1]) \ + : "l"(mc_ptr) \ + : "memory"); \ + } else if constexpr (Alignment == 4) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec asm_type \ + " %0, [%1];" \ + : "=r"(vec.u32) \ + : "l"(mc_ptr) \ + : "memory"); \ + } \ + return vec; \ + } \ + }; +#endif + +SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(at::BFloat16, ".bf16x2", ".acc::f32"); +SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(float, ".f32", ""); + +template +__device__ __inline__ void multimem_st(T* mc_ptr, Vec& vec) { +#if defined(USE_ROCM) || !defined(NVCC_SUPPORTS_MULTICAST) + CUDA_KERNEL_ASSERT(false); +#else + if constexpr (Alignment == 16) { + asm("multimem.st.relaxed.sys.global.v4.f32 [%0], {%1,%2,%3,%4};" + : + : "l"(mc_ptr), + "r"(vec.u32[0]), + "r"(vec.u32[1]), + "r"(vec.u32[2]), + "r"(vec.u32[3]) + : "memory"); + } else if constexpr (Alignment == 8) { + asm("multimem.st.relaxed.sys.global.v2.f32 [%0], {%1,%2};" + : + : "l"(mc_ptr), "r"(vec.u32[0]), "r"(vec.u32[1]) + : "memory"); + } else if constexpr (Alignment == 4) { + asm("multimem.st.relaxed.sys.global.f32 [%0], %1;" + : + : "l"(mc_ptr), "r"(vec.u32) + : "memory"); + } else { + static_assert(dependent_false); + } +#endif +} + +template +__device__ __inline__ T add_bf16x2(T a, T b) { + static_assert(sizeof(T) == 4); +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)) + CUDA_KERNEL_ASSERT(false); + return T{}; +#elif defined(USE_ROCM) + union bf2f { + float f; + __hip_bfloat16 bf[2]; + } _bf2f_a = {.f = 0}, _bf2f_b = {.f = 0}; + + //__hip_bfloat162 is a struct with two __hip_bfloat16 elements called x and y + // This typecasts input a and b as bfloat16 and maps to low bits of a float + // and does the addition in float + _bf2f_a.bf[1] = reinterpret_cast<__hip_bfloat162*>(&a)->x; + _bf2f_b.bf[1] = reinterpret_cast<__hip_bfloat162*>(&b)->x; + union f2bf { + float f; + __hip_bfloat16 bf[2]; + } _f2bf_res0, _f2bf_res1; + _f2bf_res0.f = _bf2f_a.f + _bf2f_b.f; + + // Same thing for y elements of __hip_bfloat162 + _bf2f_a.bf[1] = reinterpret_cast<__hip_bfloat162*>(&a)->y; + _bf2f_b.bf[1] = reinterpret_cast<__hip_bfloat162*>(&b)->y; + _f2bf_res1.f = _bf2f_a.f + _bf2f_b.f; + + // Put the two results together + __hip_bfloat162 rtn(_f2bf_res0.bf[1], _f2bf_res1.bf[1]); + return *reinterpret_cast(&rtn); +#else + auto res = __hadd2( + *reinterpret_cast<__nv_bfloat162*>(&a), + *reinterpret_cast<__nv_bfloat162*>(&b)); + return *reinterpret_cast(&res); +#endif +} + +template +__device__ __inline__ Vec add_vec( + const Vec& a, + const Vec& b) { + Vec c{}; + if constexpr (std::is_same_v) { + if constexpr (Alignment == 16) { + c.f32[0] = a.f32[0] + b.f32[0]; + c.f32[1] = a.f32[1] + b.f32[1]; + c.f32[2] = a.f32[2] + b.f32[2]; + c.f32[3] = a.f32[3] + b.f32[3]; + } else if constexpr (Alignment == 8) { + c.f32[0] = a.f32[0] + b.f32[0]; + c.f32[1] = a.f32[1] + b.f32[1]; + } else if constexpr (Alignment == 4) { + c.f32 = a.f32 + b.f32; + } else { + static_assert(dependent_false); + } + } else if constexpr (std::is_same_v) { + if constexpr (Alignment == 16) { + c.u32[0] = add_bf16x2(a.u32[0], b.u32[0]); + c.u32[1] = add_bf16x2(a.u32[1], b.u32[1]); + c.u32[2] = add_bf16x2(a.u32[2], b.u32[2]); + c.u32[3] = add_bf16x2(a.u32[3], b.u32[3]); + } else if constexpr (Alignment == 8) { + c.u32[0] = add_bf16x2(a.u32[0], b.u32[0]); + c.u32[1] = add_bf16x2(a.u32[1], b.u32[1]); + } else if constexpr (Alignment == 4) { + c.u32 = add_bf16x2(a.u32, b.u32); + } else { + static_assert(dependent_false); + } + } else { + static_assert(dependent_false); + } + return c; +} + +// With world_size specialization: perform balanced load from all peers before +// performing reduction. +template +__device__ inline std::enable_if_t<(k_world_size > 0), Vec> +load_and_reduce(T** ptrs, size_t rank, size_t world_size, size_t offset) { + Vec vecs[k_world_size]; +#pragma unroll k_world_size + for (size_t step = 0; step < k_world_size; ++step) { + size_t remote_rank = (rank + step) % k_world_size; + vecs[remote_rank] = + at::native::memory::ld_vec(ptrs[remote_rank] + offset); + } + auto acc = vecs[0]; +#pragma unroll k_world_size - 1 + for (size_t r = 1; r < world_size; ++r) { + acc = add_vec(acc, vecs[r]); + } + return acc; +} + +// Without world_size specialization: perform ordered (unbalanced) load and +// accumulate on each load. +template +__device__ inline std::enable_if_t<(k_world_size <= 0), Vec> +load_and_reduce(T** ptrs, size_t rank, size_t world_size, size_t offset) { + Vec acc{}; + for (size_t step = 0; step < world_size; ++step) { + auto vec = at::native::memory::ld_vec(ptrs[step] + offset); + acc = add_vec(acc, vec); + } + return acc; +} + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..caaa52d32408bffd084a772282c94e23af9d0a6b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp @@ -0,0 +1,157 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d::symmetric_memory { + +// Resource wrapper that owns a (vaddr, allocation handle) pair. Upon +// destruction, it unmaps the vaddr and releases the allocation handle. +struct AllocationRef : public c10::intrusive_ptr_target { + void* ptr; + HandleType handle; + size_t block_size; + int device_idx; + bool is_multicast; + + AllocationRef( + void* ptr, + HandleType handle, + size_t block_size, + int device_idx, + bool is_multicast = false); + + ~AllocationRef(); +}; + +// Forward declaration of CUDAPeerAllocInfo +class CUDAPeerAllocInfo; + +class CUDASymmetricMemory : public SymmetricMemory { + public: + // This is mostly a shallow copy that shares the pointer to + // `CUDAPeerAllocInfo` which corresponds to the base Block. The + // CUDASymmetricMemory handle is specified by the offset to the base ptr. + CUDASymmetricMemory( + const c10::intrusive_ptr& pai, + size_t offset); + + ~CUDASymmetricMemory() override {}; + + std::vector get_buffer_ptrs() override; + std::vector get_signal_pad_ptrs() override; + void** get_buffer_ptrs_dev() override; + void** get_signal_pad_ptrs_dev() override; + size_t get_buffer_size() override; + size_t get_offset() override; + + bool has_multicast_support() override; + void* get_multicast_ptr() override; + + void barrier(int channel, size_t timeout_ms) override; + void put_signal(int dst_rank, int channel, size_t timeout_ms) override; + void wait_signal(int src_rank, int channel, size_t timeout_ms) override; + + int get_rank() override; + int get_world_size() override; + c10::Device get_device() override; + bool world_within_direct_access() override; + + private: + int local_device_idx_; + int rank_; + int world_size_; + c10::intrusive_ptr pai_; + size_t offset_{0}; // in byte +}; + +// A class to hold the base pointers and signal pad pointers for a group of +// peers. One `CUDAPeerAllocInfo` object can be shared by multiple +// `CUDASymmetricMemory` objects when latter reside on the same allocation +// and rendezvous over the same group. (The `CUDASymmetricMemory` objects may +// have different offsets compared to the base address.) +class CUDAPeerAllocInfo : public c10::intrusive_ptr_target { + public: + CUDAPeerAllocInfo( + std::vector> alloc_refs, + std::vector buffers, + std::vector signal_pads, + HandleType mc_handle, + void* mc_addr, + size_t buffer_size, + int local_device_idx, + int rank, + int world_size); + + private: + std::vector> alloc_refs_; + std::vector buffers_; + std::vector signal_pads_; + HandleType mc_handle_; + void* mc_addr_; + size_t buffer_size_; + int local_device_idx_; + int rank_; + int world_size_; + void** buffers_dev_; + void** signal_pads_dev_; + + friend class CUDASymmetricMemory; +}; + +// Metadata associated with each allocation performed by +// `CUDASymmetricMemoryAllocator`. +struct Block : public c10::intrusive_ptr_target { + c10::intrusive_ptr alloc_ref; + int device_idx; + size_t block_size; + size_t buffer_size; + size_t signal_pad_offset; + std::optional default_group_name; + std::map> symm_mems; + + Block( + c10::intrusive_ptr alloc_ref, + int device_idx, + size_t block_size, + size_t buffer_size, + size_t signal_pad_offset, + const std::optional& group_name); +}; + +class CUDASymmetricMemoryAllocator : public SymmetricMemoryAllocator { + public: + void* alloc( + size_t size, + int device_idx, + const std::optional& group_name) override; + + void free(void* ptr) override; + size_t get_alloc_size(void* ptr) override; + c10::intrusive_ptr rendezvous( + void* ptr, + const std::optional& group_name) override; + bool has_multicast_support(int device_idx) override; + c10::DeviceType supported_device_type() override; + std::string name() override; + + private: + c10::intrusive_ptr find_block(void* ptr); + c10::intrusive_ptr find_block_covering(void* ptr, size_t& offset); + + std::shared_mutex mutex_; + std::unordered_map> ptr_to_block_; + c10::cuda::CUDACachingAllocator::Expandable_Segments_Handle_Type + handle_type_ = c10::cuda::CUDACachingAllocator:: + Expandable_Segments_Handle_Type::UNSPECIFIED; +}; + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2822011bdb192125ec44e3825ffff1f74aa18e55 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#if defined(USE_ROCM) +#include +#endif + +namespace c10d::symmetric_memory { + +// Covers NVL72 +constexpr int max_cuda_p2p_domain_size = 72; +// Maximum number of channels +constexpr int symm_max_nblocks = 32; + +// Maximally, a rank will need to sync with all other ranks, over all +// channels. Each signal is 32 bits, which is the minimum unit for atomic cas. +// Default signal pad size, can be overridden via set_signal_pad_size(). +constexpr size_t default_signal_pad_size = + symm_max_nblocks * max_cuda_p2p_domain_size * sizeof(uint32_t); + +#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) +using HandleType = CUmemGenericAllocationHandle; +#elif defined(USE_ROCM) +using HandleType = hipMemGenericAllocationHandle_t; +#else +using HandleType = void*; +#endif + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c6f8e725fdaea39eba18fcb303469cf9db1e643 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp @@ -0,0 +1,120 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { +namespace symmetric_memory { + +bool device_has_multicast_support(int device_idx); + +bool allow_overlapping_devices(); + +// Query environment variable to get the backend used for CUDA Symmetric Memory. +std::string getSymmMemBackendCUDA(); + +class IpcChannel { + public: + IpcChannel(); + ~IpcChannel(); + + void send_fd(int dst_pid, int fd); + int recv_fd(); + + std::vector all_gather_fds( + int rank, + const std::vector& pids, + int fd); + + int broadcast_fds( + int rank, + int src_rank, + const std::vector& pids, + int fd); + + private: + static std::string get_socket_name(int pid); + + std::string socket_name_; + int socket_; +}; + +// A set of store-based exchange methods with a preset prefix typically type of +// the SymmetricMemory. Most used as static instances at respective +// SymmetricMemory implementation files. +class StoreExchange { + public: + StoreExchange(const std::string& store_prefix) + : store_prefix_(store_prefix) {} + + // Put template function in header file so that compiler can easily access it. + template + std::vector all_gather( + const c10::intrusive_ptr& store, + int rank, + int world_size, + T val) { + static_assert(std::is_trivially_copyable_v); + + std::vector peer_keys; + peer_keys.reserve(world_size); + for (int r = 0; r < world_size; ++r) { + std::ostringstream oss; + oss << store_prefix_ << '/' << seq_id_ << '/' << r; + peer_keys.push_back(oss.str()); + } + ++seq_id_; + + { + std::vector payload( + reinterpret_cast(&val), + reinterpret_cast(&val) + sizeof(T)); + store->set(peer_keys[rank], payload); + } + + std::vector peer_vals; + peer_vals.reserve(world_size); + for (int r = 0; r < world_size; ++r) { + if (r == rank) { + peer_vals.push_back(val); + continue; + } + store->wait({peer_keys[r]}); + auto payload = store->get(peer_keys[r]); + TORCH_CHECK(payload.size() == sizeof(T)); + T peer_val{}; + std::memcpy(&peer_val, payload.data(), sizeof(T)); + peer_vals.push_back(peer_val); + } + return peer_vals; + } + + void barrier( + const c10::intrusive_ptr& store, + int rank, + int world_size) { + // TODO: implement an efficient one? + all_gather(store, rank, world_size, 0); + } + + private: + const std::string store_prefix_; + size_t seq_id_ = 0; +}; + +// Returns a pointer of virtual address that is mapped to the physical memory +// held by the handle. +void map_block( + void** ptr, + c10d::symmetric_memory::HandleType handle, + size_t size, + int device_idx); + +} // namespace symmetric_memory +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1fbf2d774c5543fcf496bb87f2599fbace5c46b7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +struct TORCH_API DMAConnectivity : c10::intrusive_ptr_target { + c10::DeviceType device_type; + std::string connection_type; + + // This is an NxN matrix representing the connectivity between N devices, + // where each element matrix[i][j] indicates the connectivity between device + // i and device j. A value of 0 denotes that there is no connection between + // device i and j. The meaning of non-zero values are specific to the + // connection type (e.g., for NVLink it represents the number of NVLinks). + std::vector> matrix; + + explicit DMAConnectivity( + c10::DeviceType device_type, + std::string connection_type, + std::vector> matrix); +}; + +struct DMAConnectivityDetector : c10::intrusive_ptr_target { + virtual c10::intrusive_ptr detect() = 0; + ~DMAConnectivityDetector() override = default; +}; + +C10_EXPORT void register_dma_connectivity_detector( + c10::DeviceType device_type, + const std::string& connection_type, + c10::intrusive_ptr detector); + +TORCH_API c10::intrusive_ptr detect_dma_connectivity( + c10::DeviceType device_type, + const std::string& connection_type); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..35c12d238f4817ffc6bcf1b52a4be4012ec27c95 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp @@ -0,0 +1,225 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d::symmetric_memory { + +// SymmetricMemory represents symmetric allocations across a group of devices. +// The allocations represented by a SymmetricMemory object are accessible by +// all devices in the group. The class can be used for op-level custom +// communication patterns (via the get_buffer APIs and the synchronization +// primitives), as well as custom communication kernels (via the buffer and +// signal_pad device pointers). +// +// To acquire a SymmetricMemory object, each rank first allocates +// identical-sized memory via SymmetricMemoryAllocator::alloc(), then invokes +// SymmetricMemoryAllocator::rendezvous() on the memory to establish the +// association across peer buffers. The rendezvous is a one-time process, and +// the mapping between a local memory memory and the associated SymmetricMemory +// object is unique. +// +// NOTE [symmetric memory signal pad] +// Signal pads are P2P-accessible memory regions designated for +// synchronization. SymmetricMemory offers built-in synchronization primitives +// such as barriers, put_signal, and wait_signal, which are all based on signal +// pads. Users may utilize signal pads for their own synchronization logic, +// provided that the signal pads remain zero-filled following successful +// synchronization. +// +// NOTE [symmetric memory synchronization channel] +// Synchronization channels allow users to use a single SymmetricMemory object +// to perform isolated synchronizations on different streams. For example, +// consider the case in which two barriers are issued on two streams for +// different purposes. Without the concept of channels, we cannot guarantee the +// correctness of the barriers since signals issued from barrier on stream A +// can be received by the barrier on stream B. By specifying different channels +// for these two barriers, they can operate correctly in parallel. +class TORCH_API SymmetricMemory : public c10::intrusive_ptr_target { + public: + ~SymmetricMemory() override = default; + + virtual std::vector get_buffer_ptrs() = 0; + virtual std::vector get_signal_pad_ptrs() = 0; + + // get_buffer_ptrs_dev() and get_signal_pad_ptrs_dev() each return a pointer + // to a device array of size world_size, containing buffer pointers and + // signal pad pointers, respectively. + virtual void** get_buffer_ptrs_dev() = 0; + virtual void** get_signal_pad_ptrs_dev() = 0; + virtual size_t get_buffer_size() = 0; + size_t get_signal_pad_size(); + + virtual size_t get_offset() { + TORCH_CHECK(false, "NYI"); + } + + virtual bool has_multicast_support() = 0; + virtual void* get_multicast_ptr() = 0; + + at::Tensor get_buffer( + int rank, + c10::IntArrayRef sizes, + c10::ScalarType dtype, + int64_t storage_offset); + + at::Tensor get_signal_pad( + int rank, + c10::IntArrayRef sizes, + std::optional dtype = std::nullopt, + int64_t storage_offset = 0); + + at::Tensor get_remote_tensor( + int peer, + c10::IntArrayRef sizes, + c10::ScalarType dtype); + + virtual void barrier(int channel, size_t timeout_ms) = 0; + virtual void put_signal(int dst_rank, int channel, size_t timeout_ms) = 0; + virtual void wait_signal(int src_rank, int channel, size_t timeout_ms) = 0; + + virtual int get_rank() = 0; + virtual int get_world_size() = 0; + virtual c10::Device get_device() = 0; + + virtual const std::vector& get_rank_to_global_rank() { + TORCH_CHECK(false, "NYI"); + } + + virtual int* get_rank_to_global_rank_dev() { + TORCH_CHECK(false, "NYI"); + } + + // Returns true if *all* peers within the group are accessible via direct + // memory load and store. + virtual bool world_within_direct_access() { + TORCH_CHECK(false, "NYI"); + } +}; + +class SymmetricMemoryAllocator : public c10::intrusive_ptr_target { + public: + ~SymmetricMemoryAllocator() override = default; + + virtual void* alloc( + size_t size, + int device_idx, + const std::optional& group_name) = 0; + + virtual void free(void* ptr) = 0; + virtual size_t get_alloc_size(void* ptr) = 0; + virtual c10::intrusive_ptr rendezvous( + void* ptr, + const std::optional& group_name) = 0; + virtual bool has_multicast_support(int device_idx) = 0; + virtual c10::DeviceType supported_device_type() = 0; + virtual std::string name() = 0; +}; + +C10_EXPORT bool is_finalizing(); + +C10_EXPORT void register_allocator( + c10::DeviceType device_type, + c10::intrusive_ptr allocator); + +C10_EXPORT void register_availability( + const std::string& name, + c10::intrusive_ptr allocator); + +C10_EXPORT bool has_allocator(c10::DeviceType device_type); + +C10_EXPORT c10::intrusive_ptr get_allocator( + c10::DeviceType device_type); + +// Set a store for rendezvousing symmetric allocations on a group of devices +// identified by `group_name`. The concept of groups is logical; users can +// utilize predefined groups (e.g., a group of device identified by a +// ProcessGroup) or create custom ones. Note that a SymmetricMemoryAllocator +// backends might employ a more efficient communication channel for the actual +// rendezvous process and only use the store for bootstrapping purposes. +TORCH_API void set_group_info( + const std::string& group_name, + int rank, + int world_size, + c10::intrusive_ptr store); + +struct GroupInfo { + int rank; + int world_size; + c10::intrusive_ptr store; + // Note this field is not automatically populated by set_group_info(). If a + // SymmetricMemory implementation needs to use it, it must be populated by a + // call to exchange_global_ranks() first. + std::vector rank_to_global_rank; +}; + +C10_EXPORT GroupInfo& get_group_info(const std::string& group_name); + +// Identical to empty_strided, but allows symmetric memory access to be +// established for the allocated tensor via SymmetricMemory::rendezvous(). This +// function itself is not a collective operation. It invokes +// SymmetricMemoryAllocator::alloc() for the requested device under the hood. +// +// NOTE [symmetric memory persistent allocation] +// If an `alloc_id` is supplied, empty_strided_p2p will perform persistent +// allocation. This makes the function cache allocated memory and ensure that +// invocations with the same `alloc_id` receive tensors backed by the same +// memory address. For safety, if a previous persistent allocation is still +// active (i.e., the storage of the returned tensor is still alive), persistent +// allocations with the same `alloc_id` will fail. This determinism coupled +// with memory planning of communication buffers (e.g., by Inductor) allows +// communication algorithms to reliably reuse previously established remote +// memory access. +TORCH_API at::Tensor empty_strided_p2p( + c10::IntArrayRef size, + c10::IntArrayRef stride, + c10::ScalarType dtype, + c10::Device device, + const std::optional& group_name, + std::optional alloc_id); + +// Establishes symmetric memory access on tensors allocated via +// empty_strided_p2p() and empty_strided_p2p_persistent(). rendezvous() is a +// one-time process, and the mapping between a local memory region and the +// associated SymmetricMemory object is unique. Subsequent calls to +// rendezvous() with the same tensor, or tensors allocated with +// empty_strided_p2p_persistent() using the same alloc_id, will receive the +// cached SymmetricMemory object. +// +// The function has a collective semantic and must be invoked simultaneously +// from all rendezvous participants. +TORCH_API c10::intrusive_ptr rendezvous( + const at::Tensor& tensor, + const std::optional& group_name = std::nullopt); + +TORCH_API bool has_multicast_support( + c10::DeviceType device_type, + int device_idx); + +TORCH_API void set_backend(const std::string& name); + +TORCH_API std::optional get_backend(c10::Device device); + +// Get the current signal pad size for symmetric memory allocations. +// Returns the user-configured size if set, otherwise returns the default size. +TORCH_API size_t get_signal_pad_size(); + +// Set the signal pad size for future symmetric memory allocations. +// This must be called before any symmetric memory allocations are made. +// The size should be proportional to the number of blocks the user launches +// and the world size. +TORCH_API void set_signal_pad_size(size_t size); + +C10_EXPORT void register_mempool_allocator( + c10::DeviceType device_type, + std::shared_ptr allocator); + +TORCH_API std::shared_ptr get_mempool_allocator( + c10::Device device); + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ec10b3d7f4a84d66f1bb54ce582c56155d7f9257 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace c10d::symmetric_memory { + +static int getenv_nblocks() { + static int num_blocks = -1; // Uninitialized + if (num_blocks == -1) { + auto str = c10::utils::get_env("TORCH_SYMMMEM_NBLOCKS"); + if (str.has_value()) { + num_blocks = std::stoi(str.value()); + } else { + num_blocks = -2; // Not set + } + } + return num_blocks; +} + +} // namespace c10d::symmetric_memory +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..28a07ea5936325b11d5f9295d3169e68e11e284f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp @@ -0,0 +1,96 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d::intra_node_comm { + +using namespace c10d::symmetric_memory; + +constexpr size_t kMaxDevices = 8; +constexpr size_t kDefaultBufferSize = 10ull * 1024 * 1024; + +using NvlMesh = std::array, kMaxDevices>; + +enum class Topology : uint8_t { + UNKNOWN = 0, + FULLY_CONNECTED = 1, +}; + +enum class AllReduceAlgo : uint8_t { + NONE = 0, + ONE_SHOT = 1, + TWO_SHOT = 2, +}; + +// NOTE: this class will be be removed soon in favor of SymmetricMemory +class TORCH_API IntraNodeComm : public c10::intrusive_ptr_target { + public: + IntraNodeComm( + c10::intrusive_ptr store, + size_t rank, + size_t worldSize, + std::optional bufferSize = std::nullopt); + + ~IntraNodeComm() override; + + static bool isEnabled(); + + /** + * Performs rendezvous. + * If rendezvous fails, the IntraNodeComm object will be in an invalid + * state and it is the caller's responsibility to dispose it. + */ + bool rendezvous(); + + /** + * Selects a AllReduceAlgo that we think will outperform nccl. + * Returns AllReduceAlgo::NONE if we don't think we can outperform nccl. + */ + AllReduceAlgo selectAllReduceAlgo(const at::Tensor& input); + + at::Tensor allReduce(const at::Tensor& input, AllReduceAlgo algo); + + private: + at::Tensor oneShotAllReduce( + const at::Tensor& input, + at::cuda::CUDAStream& stream); + + at::Tensor twoShotAllReduce( + const at::Tensor& input, + at::cuda::CUDAStream& stream); + + c10::intrusive_ptr store_; + size_t rank_; + size_t worldSize_; + size_t bufferSize_; + + /** + * Members initialized after rendezvous + */ + bool isInitialized_ = false; + int deviceIdx_{0}; + Topology topology_ = Topology::UNKNOWN; + void* symmetricMemoryPtr_ = nullptr; + c10::intrusive_ptr symmetricMemory_ = nullptr; +}; + +class IntraNodeCommWork : public c10d::Work { + public: + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return true; + } +}; + +TORCH_API int64_t getIntraNodeCommUsageCounter(); + +bool isIntraNodeCommSupported(); +} // namespace c10d::intra_node_comm + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6bf1ed77235da338723478a4868a1cb7a4379ba5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp @@ -0,0 +1,174 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Starting from NVSHMEM 3.3.9, nvshmem_host.h exists so that we can cleanly +// include only the nvshmem host library headers: +// #include +// It translates into the following two lines: +#include +#include +// For maximum compatibility, we use the "host/" style for now. + +namespace c10d::nvshmem_extension { + +// This corresponds to max nblocks +constexpr int MAX_N_TEAMS = 128; + +// A pool of teams for each group. These are duplicate teams. +using TeamPool = std::vector; + +// Manage all the team business. Singleton. +class TeamManager { + public: + // Constructor + explicit TeamManager(const c10::Device device) : device_(device) {} + + // Get single, global manager. + static TeamManager& get(const c10::Device device) { + static TeamManager manager(device); + TORCH_CHECK( + manager.device_ == device, + "Detected use of TeamManager on multiple devices. This is not supported."); + return manager; + } + + // Get a team for a group. + nvshmem_team_t get_team( + const std::string& group_name, + const std::vector& global_ranks) { + auto [team_pool, pool_updated] = + group_to_team_pool(group_name, global_ranks, 1); + // Return the fist available team + return team_pool[0]; + } + + // Get n teams for a group. + // The first element of the returned pair is the team pool on host side. + // The second element of the returned pair is the team pool on device side. + // This API must be call with a device guard. + std::pair get_n_teams( + const std::string& group_name, + const std::vector& global_ranks, + const int need_n) { + // A device guard is required for malloc and memcpy below + c10::cuda::CUDAGuard guard(device_); + // Get the team pool with the requested number of teams + auto [team_pool, pool_updated] = + group_to_team_pool(group_name, global_ranks, need_n); + // Check if the pool already exists in device memory + nvshmem_team_t* team_pool_dev = nullptr; + constexpr auto pool_bytes = sizeof(nvshmem_team_t) * MAX_N_TEAMS; + auto it = team_pool_devptrs_.find(group_name); + if (it == team_pool_devptrs_.end()) { + // If not, allocate a new pool in device memory + team_pool_dev = reinterpret_cast( + c10::cuda::CUDACachingAllocator::raw_alloc(pool_bytes)); + team_pool_devptrs_[group_name] = team_pool_dev; + } else { + team_pool_dev = it->second; + } + // Update the pool in device memory if host side pool is updated + if (pool_updated) { + TORCH_INTERNAL_ASSERT(team_pool.size() == MAX_N_TEAMS); + auto stream = at::cuda::getCurrentCUDAStream(); + C10_CUDA_CHECK(cudaMemcpyAsync( + team_pool_dev, + team_pool.data(), + pool_bytes, + cudaMemcpyHostToDevice, + stream)); + } + return std::make_pair(std::cref(team_pool), team_pool_dev); + } + + ~TeamManager() noexcept { + // Free the team pools in device memory + // Note that we do it in a best effort manner because the team pool is + // managed by a static TeamManager and the destruction order of static + // objects is undetermined. If the destructor is called after the CUDA + // context is destroyed, cudaFree would fail. + try { + // cudaFree generally implies a device synchronization, meaning it will + // block until all preceding CUDA operations on the device have completed + // before freeing the memory. Thus we don't need to worry about freeing + // the memory before CUDA kernels complete. + for (auto& [_, team_pool_dev] : team_pool_devptrs_) { + c10::cuda::CUDACachingAllocator::raw_delete(team_pool_dev); + } + } catch (...) { + // Ignore the error + std::cerr << "Failed to free the team pool in device memory, skipping\n"; + } + } + + private: + // Get the team pool for a group. If the pool doesn't exist, create it. If the + // pool exists but is not large enough, create more teams. + // The first element of the returned pair is the team pool on host side. + // The second element of the returned pair is a boolean indicating if the pool + // is updated. + std::pair group_to_team_pool( + const std::string& group_name, + const std::vector& global_ranks, + const int need_n) { + TORCH_CHECK(need_n < MAX_N_TEAMS, "Too many teams requested"); + // Guarding the NVSHMEM API calls below just to be safe + c10::cuda::CUDAGuard guard(device_); + + // Insert a new team pool if not exists + auto [it, inserted] = group_name_to_team_pool_.emplace( + group_name, TeamPool(MAX_N_TEAMS, NVSHMEM_TEAM_INVALID)); + auto& team_pool = it->second; + bool pool_updated = inserted; + + // Create new teams if what's requested is more than what we have + int stride = 0; // stride in globe, uninitialized + for (int i = 0; i < need_n; ++i) { + if (team_pool[i] != NVSHMEM_TEAM_INVALID) { + continue; + } + // Some checks before we create new teams + if (stride == 0) { // Check only once + TORCH_CHECK(global_ranks.size() > 1); + stride = global_ranks[1] - global_ranks[0]; + for (size_t r = 1; r < global_ranks.size(); ++r) { + TORCH_CHECK(global_ranks[r] - global_ranks[r - 1] == stride); + } + } + nvshmem_team_t team = NVSHMEM_TEAM_INVALID; + nvshmem_team_split_strided( + NVSHMEM_TEAM_WORLD, + global_ranks[0], + stride, + global_ranks.size(), + nullptr, + 0, + &team); + TORCH_CHECK(team != NVSHMEM_TEAM_INVALID, "Failed to create a new team"); + team_pool[i] = team; + pool_updated = true; + } + return std::make_pair(std::cref(team_pool), pool_updated); + } + + private: + // Device where the team manager is created + const c10::Device device_; + // A map from group name to team pool for that group. + std::unordered_map group_name_to_team_pool_; + // A map from group name to team pool array in device memory. + std::unordered_map team_pool_devptrs_; +}; + +} // namespace c10d::nvshmem_extension +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..25f915f21a7cc914b352ba4b8208e3ce4617d9a7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// All RPC peers should call into this function at the same time. Each peer +// provides its own id and name, and this function uses the given Store to +// gather global name-to-id mapping on all peers. +TORCH_API std::unordered_map collectNames( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName, + const int worldSize); + +// Ranks in dynamic RPC groups will initially call into this to establish the +// name-to-id mapping for the current peers in the group. The current rank will +// put its own worker info in the store and discover all the ranks that came +// before it. NOTE: This needs to be called with the Dynamic RPC group +// membership management token held. +TORCH_API std::unordered_map collectCurrentNames( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName); + +// Remove name from Store, used in dynamic RPC groups. +// NOTE: This needs to be called with the Dynamic RPC group +// membership management token held. +TORCH_API void removeCurrentName( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName); + +// This performs a synchronization of all call counts by using store. +// All RPC peers wait for others to join to exit at the same time. +TORCH_API int syncCallCount( + ::c10d::PrefixStore store, + const int worldSize, + int activeCalls = 0); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h new file mode 100644 index 0000000000000000000000000000000000000000..f6e71490f27f885d1a2e4e897a29fe50a1cb0ea5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h @@ -0,0 +1,198 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// An enum denoting common RPC errors to allow specific error handling for them. +// NOLINTNEXTLINE(performance-enum-size) +enum RPCErrorType { + UNKNOWN_ERROR = 0, /* Indicates that error type could not be parsed */ + TIMEOUT = 1, /* Indicates that the RPC has timed out */ + INTENTIONAL_FAILURE = 2 /* Deliberate failure, such as those injected by + FaultyAgent for testing */ +}; + +// The enum values are bitwise ORed with MessageType +// They are bit flags starting from 0x100 and should have +// value such as 0x100, 0x200, 0x400, 0x800, 0xF00, etc. +// NOLINTNEXTLINE(performance-enum-size) +enum MessageTypeFlags { + REQUEST_TYPE = 0x100, + RESPONSE_TYPE = 0x200, +}; + +// Message types must have values between 0x00 to 0xff +// NOLINTNEXTLINE(performance-enum-size) +enum MessageType { + // messages for dist.rpc on builtin operators + SCRIPT_CALL = 0x00 | MessageTypeFlags::REQUEST_TYPE, + SCRIPT_RET = 0x01 | MessageTypeFlags::RESPONSE_TYPE, + + // messages for dist.rpc on Python UDF + PYTHON_CALL = 0x02 | MessageTypeFlags::REQUEST_TYPE, + PYTHON_RET = 0x03 | MessageTypeFlags::RESPONSE_TYPE, + + // messages for dist.remote on builtin operators and Python UDF + SCRIPT_REMOTE_CALL = 0x04 | + MessageTypeFlags::REQUEST_TYPE, // A remote call on a builtin operator + PYTHON_REMOTE_CALL = + 0x05 | MessageTypeFlags::REQUEST_TYPE, // A remote call on a Python UDF + REMOTE_RET = + 0x06 | MessageTypeFlags::RESPONSE_TYPE, // Response for remote calls for + // UDF, builtin, or script + + // RRef related internal messages + SCRIPT_RREF_FETCH_CALL = + 0x07 | MessageTypeFlags::REQUEST_TYPE, // A UserRRef fetches value + // from owner + PYTHON_RREF_FETCH_CALL = + 0x08 | MessageTypeFlags::REQUEST_TYPE, // A UserRRef fetches + // value from owner + SCRIPT_RREF_FETCH_RET = 0x09 | + MessageTypeFlags::RESPONSE_TYPE, // An OwnerRRef sends ivalue to user + PYTHON_RREF_FETCH_RET = 0x0a | + MessageTypeFlags::RESPONSE_TYPE, // An OwnerRRef sends py::object to user + RREF_USER_DELETE = 0x0b | + MessageTypeFlags::REQUEST_TYPE, // A UserRRef tells the owner to deref + RREF_FORK_REQUEST = + 0x0c | MessageTypeFlags::REQUEST_TYPE, // A child UserRRef tells the owner + // about itself + RREF_CHILD_ACCEPT = + 0x0d | MessageTypeFlags::REQUEST_TYPE, // A child UserRRef tells parent + // that owner knows it + RREF_ACK = + 0x0e | MessageTypeFlags::RESPONSE_TYPE, // ACK to internal RRef messages + + // Messages with autograd info + FORWARD_AUTOGRAD_REQ = 0x0f | MessageTypeFlags::REQUEST_TYPE, + FORWARD_AUTOGRAD_RESP = 0x10 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to propagate gradients on the backward pass. + BACKWARD_AUTOGRAD_REQ = 0x11 | MessageTypeFlags::REQUEST_TYPE, + BACKWARD_AUTOGRAD_RESP = 0x12 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to tell workers to clean up their autograd context. + CLEANUP_AUTOGRAD_CONTEXT_REQ = 0x13 | MessageTypeFlags::REQUEST_TYPE, + CLEANUP_AUTOGRAD_CONTEXT_RESP = 0x14 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages that tell workers to run requests with profiling enabled. + RUN_WITH_PROFILING_REQ = 0x15 | MessageTypeFlags::REQUEST_TYPE, + RUN_WITH_PROFILING_RESP = 0x16 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to support RRef.backward(). + RREF_BACKWARD_REQ = 0x17 | MessageTypeFlags::REQUEST_TYPE, + RREF_BACKWARD_RESP = 0x18 | MessageTypeFlags::RESPONSE_TYPE, + + // Other internal message types + EXCEPTION = 0x37 | MessageTypeFlags::RESPONSE_TYPE, + UNKNOWN = 0x3c +}; + +// A message to be sent/received by an RpcAgent. +// +// A Message object contains 4 fields: +// payload (std::vector): a binary chunk of data. +// tensors (std::vector): all tensors. Tensor data are not +// included in the payload, and it is up to the RpcAgent implementation +// to determine how to serialize them. This design is helpful for +// communicating super large tensors where serializing all the data at +// once leads to excessively large memory footprint. An implementation +// can then serialize and send tensors chunk-by-chunk, in the streaming +// fashion. +// type (MessageType): type of the message. +// id (int64_t): message id, this is used to match request and response. +// Other implementation can ignore it if they have their own +// ways to do matching. +// +// Layers above ``RpcAgent`` only converts ScriptCall, ScriptResp, PythonCall, +// and PythonResp into a Message, and it is up to the RpcAgent +// implementation to determine how to serialize a message. +class TORCH_API Message final : public torch::CustomClassHolder { + private: + // Keep these private in order to force users to go through make_intrusive and + // thus prevent creating a Message that's not held by an intrusive_ptr. + Message(); + + Message( + std::vector&& payload, + std::vector&& tensors, + MessageType type); + + Message( + std::vector&& payload, + std::vector&& tensors, + MessageType type, + int64_t id); + + friend c10::intrusive_ptr; + + public: + Message(const Message& other) = delete; + Message(Message&& other) = delete; + Message& operator=(Message const& rhs) = delete; + Message& operator=(Message&& rhs) = delete; + ~Message() override = default; + + // Destructively retrieves the payload. + std::vector&& movePayload() &&; + std::vector&& moveTensors() &&; + + std::vector& payload(); + const std::vector& payload() const; + std::vector& tensors(); + const std::vector& tensors() const; + MessageType type() const; + + bool isRequest() const; + bool isResponse() const; + bool isShutdown() const; + + // id is an optional field to match request/response. If an RpcAgent + // implementation is able to do the matching without using this id, it can be + // dropped during message serialization. + int64_t id() const; + void setId(int64_t id); + + std::vector> getStorages() const; + + private: + std::vector payload_; + std::vector tensors_; + MessageType type_ = MessageType::UNKNOWN; + int64_t id_ = -1; +}; + +// Create a response Message of type Exception. +// The exception string representation will be used as the message's payload. +// A message ID corresponding to the request that resulted in this response can +// be provided for matching requests/responses. +TORCH_API c10::intrusive_ptr createExceptionResponse( + const std::exception& e, + int64_t id); + +// Create a response Message of type Exception. +// The passed in string representation will be used as the message's payload. +// A message ID corresponding to the request that resulted in this response can +// be provided for matching requests/responses. +TORCH_API c10::intrusive_ptr createExceptionResponse( + const std::string& exceptionStr, + int64_t id); + +inline std::tuple< + c10::intrusive_ptr, + std::vector>> +withStorages(c10::intrusive_ptr message) { + auto storages = message->getStorages(); + return std::make_tuple(std::move(message), std::move(storages)); +} + +using JitFuture = c10::ivalue::Future; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h new file mode 100644 index 0000000000000000000000000000000000000000..d46a0e9be89a5957fd498fe2fbd17c7e02767e4b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace torch::distributed::rpc { +// All metrics are prefixed with the following key. +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) +constexpr char kRpcMetricsKeyPrefix[] = "torch.distributed.rpc."; +// APIs for logging time-series metrics for RPC-based distributed +// training. Implementations of this class should provide thread safety so that +// metrics can be logged from multiple threads without the user needing to +// coordinate serialization. +class RpcMetricsHandler { + public: + // Accumulates the metric value specified by the name for purposes of + // computing aggregate statistics over time. + virtual void accumulateMetric(const std::string& name, double value) = 0; + // Increment a count for the metric given by the name. + virtual void incrementMetric(const std::string& name) = 0; + virtual ~RpcMetricsHandler() = default; +}; + +// Configuration struct for metrics handling. +struct RpcMetricsConfig { + explicit RpcMetricsConfig(std::string handlerName, bool enabled) + : handlerName_(std::move(handlerName)), enabled_(enabled) {} + + // Handler name + std::string handlerName_; + // Whether metrics exporting should be enabled or not. + bool enabled_; +}; + +// A registry for different implementations of RpcMetricsHandler. Classes +// implementing the above interface should use this to register implementations. +TORCH_DECLARE_REGISTRY( + RpcMetricsHandlerRegistry, + torch::distributed::rpc::RpcMetricsHandler); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..ba39ea8a02305f64f642b6927ee43b35459d10b8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h @@ -0,0 +1,60 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { +extern const std::string REMOTE_PROFILING_KEY_PREFIX; + +class TORCH_API RemoteProfilerManager { + public: + // Retrieves the lazily-initialized RemoteProfilerManager singleton instance. + static RemoteProfilerManager& getInstance(); + // Sets the current, thread-local profiling key. + void setCurrentKey(std::string key); + // Returns whether the current profiling key is set. + bool isCurrentKeySet() const; + // Unsets the current, thread-local profiling key to allow other RPCs to reset + // it. + void unsetCurrentKey(); + // inserts a pair (globallyUniqueId, key) to an in-memory map. The + // corresponding ID is used in RPC deserialization to prefix remotely profiled + // events with the right key. + void saveRPCKey( + ProfilingId globallyUniqueId, + const std::string& rpcProfilingKey); + // Retrieves the profiling key corresponding to the given globallyUniqueId. + // Throws if it is not found. + std::string retrieveRPCProfilingKey(const ProfilingId& globallyUniqueId); + // Generates the next globally unique ID for profiling. + ProfilingId getNextProfilerId(); + // Retrieves the currently set thread-local profiling key. Throws if it is not + // set. + std::string& getCurrentProfilingKey(); + // erases the globallyUniqueId from the map. This can help save memory in the + // case that many RPCs are being profiled. + void eraseKey(const ProfilingId& globallyUniqueId); + + RemoteProfilerManager(const RemoteProfilerManager& other) = delete; + RemoteProfilerManager operator=(const RemoteProfilerManager& other) = delete; + RemoteProfilerManager(RemoteProfilerManager&&) = delete; + RemoteProfilerManager& operator=(RemoteProfilerManager&&) = delete; + + private: + RemoteProfilerManager(); + ~RemoteProfilerManager() = default; + local_id_t getNextLocalId(); + std::unordered_map + profiledRpcKeys_; + static thread_local std::optional currentThreadLocalKey_; + std::mutex mutex_; + local_id_t currentLocalId_; +}; +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..f86461f6f895bee8a46b54b9f57dd88b66362ad6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h @@ -0,0 +1,134 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::distributed::rpc::profiler::processglobal { + +using namespace torch::autograd::profiler; + +// Process global profiler state. +// +// This class holds information about a profiling range, from "enable" to +// "disable". +// An instance of this ``State`` will be +// pushed into a global stack, so nested profiling range is supported. +// +// It has 2 members. +// One is ``autograd::profiler::ProfilerConfig``. It's set by user and +// will be copied to thread-local profiler state of RPC threads. +// The other is a container that aggregates recorded +// ``autograd::profiler::Event``s from all thread-local profilers on RPC +// threads. +class State { + public: + explicit State(ProfilerConfig config) : config_(std::move(config)) {} + ~State() = default; + + const ProfilerConfig& config() const { + return config_; + } + + void pushResult(thread_event_lists result) { + std::unique_lock lock(resultsMutex_); + + // NB: When a thread wants to push an entry into the this container, + // main control logic might have exited the process-global profile range. + results_.emplace_back(std::move(result)); + } + + std::vector results(); + + private: + // Each result comes from a profile range. In each profile range, there is a + // "__profiler_start" marker event that all following events calculate time + // relative to it, so it's required to call + // parse_cpu_trace(result) for results of all profile range. + std::mutex resultsMutex_; + std::vector results_; + const ProfilerConfig config_ = ProfilerConfig(ProfilerState::Disabled); +}; + +class StateStackEntry; + +#if defined(__MACH__) +// Compiler error: 'shared_timed_mutex' is unavailable: introduced in +// macOS 10.12 +using mutexType = std::mutex; +// Compiler error: 'shared_lock' is unavailable: introduced in +// macOS 10.12 +using rLockType = std::unique_lock; +using wLockType = std::unique_lock; +#else +using mutexType = std::shared_timed_mutex; +using rLockType = std::shared_lock; +using wLockType = std::unique_lock; +#endif + +// This is the global stack of ``State``s. +TORCH_API extern std::shared_ptr currentStateStackEntryPtr; +TORCH_API extern mutexType currentStateStackEntryMutex; + +// This class is used to implement a stack of ``State``s. +// It has 2 members. +// One is `prevPtr`, a shared_ptr pointing to previous element in the +// stack. +// The other is ``statePtr``, a shared_ptr pointing to ``State``. +class StateStackEntry { + public: + StateStackEntry( + std::shared_ptr prevPtr, + std::shared_ptr statePtr) + : prevPtr_(std::move(prevPtr)), statePtr_(std::move(statePtr)) {} + + static void pushRange(std::shared_ptr profilerProcessGlobalStatePtr); + static std::shared_ptr popRange(); + + static std::shared_ptr current() { + rLockType rlock(currentStateStackEntryMutex); + + return currentStateStackEntryPtr; + } + + std::shared_ptr prevPtr() const { + return prevPtr_; + } + + std::shared_ptr statePtr() const { + return statePtr_; + } + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::shared_ptr prevPtr_{nullptr}; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::shared_ptr statePtr_{nullptr}; +}; + +// Push the result to ``State``s of current profile range and recursively outer +// profile ranges. +TORCH_API void pushResultRecursive( + std::shared_ptr stateStackEntryPtr, + const thread_event_lists& result); + +// User-facing API. +// +// Enter a server-side process-global profiling range. +// Profiling range can be neste, so it's ok to call this API for multiple +// times. This enables all RPC threads running server-side request callbacks. +TORCH_API void enableServer(const ProfilerConfig& new_config); +// +// Exit a server-side process-global profiling range. +// Profiling range can be neste, so it's possible that profiler is still on +// after calling this API. +// This enables all RPC threads running server-side request callbacks. +TORCH_API std::vector disableServer(); + +} // namespace torch::distributed::rpc::profiler::processglobal + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h new file mode 100644 index 0000000000000000000000000000000000000000..c5735e844b6e1f62dbfc779a76db78857e111593 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::rpc { + +// NOLINTNEXTLINE(performance-enum-size) +enum RRefProxyType { RPC_SYNC, RPC_ASYNC, REMOTE }; + +// Python wrapper of an RRef shared_ptr that supports Python +// pickle and unpickle. +class PYBIND11_EXPORT PyRRef { + public: + // The first ctor can only be called while holding GIL. See its implementation + // for more explanations. + explicit PyRRef(const py::object& value, const py::object& type_hint); + explicit PyRRef(c10::intrusive_ptr rref); + PyRRef(const PyRRef&) = default; + ~PyRRef(); + + bool isOwner() const; + bool confirmedByOwner() const; + WorkerInfo owner() const; + std::string ownerName() const; + py::object toHere( + const float timeoutSeconds = + torch::distributed::rpc::kUnsetRpcTimeout) const; + py::object localValue() const; + std::string str() const; + py::tuple pickle() const; + static PyRRef unpickle(const py::tuple& t); + c10::IValue toIValue() const; + // Future that is associated with the creation of this RRef on the remote end. + // This is only used to get the future corresponding to the rref for profiling + // use cases. + c10::intrusive_ptr getFuture() const; + // Keeps track of the future responsible for profiling owner creation + // acknowledgement + c10::intrusive_ptr getProfilingFuture() const; + // Sets the future responsible for profiling owner creation acknowledgement. + // This future is set from python to be a future that returns when profiling + // callbacks have been run. + void setProfilingFuture(c10::intrusive_ptr profilingFuture); + + // create a proxy on this RRef, which can be used to launch RPC on the owner + // of this RRef to run functions on the object referenced by this RRef. + py::object createRRefProxy( + const RRefProxyType& mode, + float timeoutSeconds = rpc::kUnsetRpcTimeout) const; + + // get the type of the data object referenced by this RRef. Timeout argument + // is only used in the first invocation of this function as an argument to the + // RPC to the owner node of the RRef. + py::object getRRefType( + float timeout = rpc::kUnsetRpcTimeout, + bool blocking = true); + + // Run the backward pass with the RRef as the root. + void backward(int64_t autogradContextId, bool retainGraph); + + // Helper static function to run backward on a given rref. + static void backward( + int64_t autogradContextId, + bool retainGraph, + const c10::intrusive_ptr& rref); + + // Specialization of backward if the rref is an OwnerRRef. + static void backwardOwnerRRef( + int64_t autogradContextId, + bool retainGraph, + IValue value); + + private: + c10::intrusive_ptr rref_; + std::optional> profilingFuture_; + std::optional type_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h new file mode 100644 index 0000000000000000000000000000000000000000..ea339cae11b4dedb5a2aefa2b702443bc53708a4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// RPC call representing calling a Python function over RPC. +class TORCH_API PythonCall final : public RpcCommandBase { + public: + PythonCall(SerializedPyObj&& serializedPyObj, bool isAsyncExecution); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage(const Message& message); + + const SerializedPyObj& serializedPyObj() const; + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + private: + SerializedPyObj serializedPyObj_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..ba69781bdcb0a200b52d30da80cabeb52ff8608d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Converts an internal ivalue::Future of Message into a user-facing +// ivalue::Future of py::object type by creating a new ivalue::Future and call +// its markCompleted as a callback in the given ivalue::Future. +// If hasValue is true, the Message will be converted into a py::object and then +// wrap it with an IValue. If hasValue is false, this ivalue::Future is only +// used for signaling and launching callbacks. In this case, the message will be +// discarded and then set the ivalue::Future using an empty IValue or the given +// FutureError if there is an error. +c10::intrusive_ptr toPyJitFuture( + const c10::intrusive_ptr& messageJitFuture, + bool hasValue = true); + +c10::intrusive_ptr pyRpcBuiltin( + const WorkerInfo& dst, + const std::string& opName, + const py::args& args, + const py::kwargs& kwargs, + const float rpcTimeoutSeconds); + +c10::intrusive_ptr pyRpcPythonUdf( + const WorkerInfo& dst, + std::string& pickledPythonUDF, + std::vector& tensors, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +c10::intrusive_ptr pyRpcTorchscript( + const std::string& dstWorkerName, + const std::string& qualifiedNameStr, + const py::tuple& argsTuple, + const py::dict& kwargsDict, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +PyRRef pyRemoteBuiltin( + const WorkerInfo& dst, + const std::string& opName, + const float rpcTimeoutSeconds, + const py::args& args, + const py::kwargs& kwargs); + +PyRRef pyRemotePythonUdf( + const WorkerInfo& dst, + std::string& pickledPythonUDF, + std::vector& tensors, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +PyRRef pyRemoteTorchscript( + const std::string& dstWorkerName, + const std::string& qualifiedNameStr, + const float rpcTimeoutSeconds, + const bool isAsyncExecution, + const py::args& args, + const py::kwargs& kwargs); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..b34cb40349850c4c643e9e5d899b53293f220a2b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +namespace torch::distributed::rpc { + +class TORCH_API PythonRemoteCall : public RpcCommandBase { + public: + PythonRemoteCall( + SerializedPyObj&& serializedPyObj, + at::IValue retRRefId, + at::IValue retForkId, + const bool isAsyncExecution); + + inline const SerializedPyObj& serializedPyObj() const { + return serializedPyObj_; + } + + inline const at::IValue& retRRefId() const { + return retRRefId_; + } + + inline const at::IValue& retForkId() const { + return retForkId_; + } + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + SerializedPyObj serializedPyObj_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue retRRefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue retForkId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..cc47fbe631a219bcd16a847d1bb849fa812068cf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// RPC call representing the response of a Python UDF over RPC. +class TORCH_API PythonResp final : public RpcCommandBase { + public: + explicit PythonResp(SerializedPyObj&& serializedPyObj); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage(const Message& message); + + const SerializedPyObj& serializedPyObj() const; + + private: + SerializedPyObj serializedPyObj_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..8de1c7d7592e448f541f2a3f03b1a0f9b9a5f2ec --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h @@ -0,0 +1,134 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Singleton class provides interface to execute python UDF remote call +// and deserialize the returned results by running python function +// in internal_rpc_utilities. +// The singleton object is constructed at first when RPC agent is +// constructed, where the python function in +// torch/distributed/internal_rpc_utils.py are imported only once. +class PYBIND11_EXPORT PythonRpcHandler { + public: + struct RRefProxyFunctions { + py::object rrefProxyCtor_; + py::object rpcSync_; + py::object rpcAsync_; + py::object remote_; + }; + + struct RRefTypeFunctions { + py::object onOwner_; + py::object onUser_; + }; + + static PythonRpcHandler& getInstance(); + + // Run a pickled Python UDF and return the result py::object + py::object runPythonUdf(const py::object& pythonUdf); + + // Serialized a py::object into a string + SerializedPyObj serialize(const py::object& obj); + + // Deserialize a string into a py::object + py::object deserialize(const SerializedPyObj& serializedObj); + + // Check if obj is RemoteException, then throw it + void handleException(const py::object& obj); + // Alternative if the caller is already holding the GIL. + void handleExceptionGILHeld(const py::object& obj); + // Check if obj is an RemoteException instance. + bool isRemoteException(const py::object& obj); + + // Explicitly clean up py::objects to avoid segment faults when + // py::objects with CPython are cleaned up later at program exit + // See similar issues reported https://github.com/pybind/pybind11/issues/1598 + // and https://github.com/pybind/pybind11/issues/1493 + // Our local tests also caught this segment faults if py::objects are cleaned + // up at program exit. The explanation is: CPython cleans up most critical + // utilities before cleaning up PythonRpcHandler singleton, so when + // PythonRpcHandler singleton cleans up py::objects and call dec_ref(), it + // will crash. + // The solution is to clean up py::objects earlier when Rpc agent join(). + // Be note that py::objects can not be cleaned up when Rpc agent is destroyed + // as well, as Rpc agent is global variable and it will have same issue as + // PythonRpcHandler. + void cleanup(); + + std::shared_ptr jitCompilationUnit(); + + // Parse the string to recover the jit_type, this is used for RRef python + // pickling/unpickling type recovery. The type string inference rule is as + // follows: + // 1. first try to parse if this is primitive types. + // i.e. TensorType, IntType, PyObjectType, etc. + // 2. if not primitive type, we query the python_cu to see if it is a + // class type or interface type registered in python + // We use a ScriptTypeParser instance with custom PythonTypeResolver + // to resolve types according to the above rules. + TypePtr parseTypeFromStr(const std::string& typeStr); + + // Return a set of Python functions for RRef helpers. + const RRefProxyFunctions& getRRefProxyFunctions() const; + + // Return a set of Python functions to retrieve the type of the object + // referenced by a given RRef. + const RRefTypeFunctions& getRRefTypeFunctions() const; + + PythonRpcHandler(const PythonRpcHandler&) = delete; + PythonRpcHandler& operator=(const PythonRpcHandler&) = delete; + PythonRpcHandler(PythonRpcHandler&&) = delete; + PythonRpcHandler& operator=(PythonRpcHandler&&) = delete; + + private: + void init(); + PythonRpcHandler(); + ~PythonRpcHandler() = default; + + // Ref to `torch.distributed.rpc.internal._run_function`. + py::object pyRunFunction_; + + // Ref to `torch.distributed.rpc.internal.serialize`. + py::object pySerialize_; + + // Ref to `torch.distributed.rpc.internal.deserialize`. + py::object pyDeserialize_; + + // Ref to 'torch.distributed.rpc.internal._handle_exception' + py::object pyHandleException_; + + // Python functions for RRef proxy + RRefProxyFunctions rrefProxyFunctions_; + + // Ref to 'torch.distributed.rpc.api._rref_typeof_on_' + RRefTypeFunctions rrefTypeFunctions_; + + // Shared ptr to python compilation unit in jit, it is constructed in python + // side (see _python_cu = torch._C.CompilationUnit() in jit/__init__.py) + // and imported in C++ (see get_python_cu() in + // csrc/jit/python/pybind_utils.h). We import the compilation unit here only + // once for less cost and thread safety. + std::shared_ptr jitCompilationUnit_; + + // jit type parser to parse type_str back to TypePtr for RRef type + // recovery when pickling and unpickling RRef + std::shared_ptr typeParser_; + + // Indicates whether or not we have properly initialized the handler. + bool initialized_; + + // Lock to protect initialization. + std::mutex init_lock_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..f0b0a52371b27e14b530df19a47491337c856d63 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +// Functor which is invoked to process an RPC message. This is an abstract class +// with some common functionality across all request handlers. Users need to +// implement this interface to perform the actual business logic. +class TORCH_API RequestCallback { + public: + // Invoke the callback. + c10::intrusive_ptr operator()( + Message& request, + std::vector streams) const; + + virtual ~RequestCallback() = default; + + protected: + // RpcAgent implementation should invoke ``RequestCallback`` to process + // received requests. There is no restriction on the implementation's + // threading model. This function takes an rvalue reference of the Message + // object. It is expected to return the future to a response message or + // message containing an exception. Different rpc agent implementations are + // expected to ensure delivery of the response/exception based on their + // implementation specific mechanisms. + virtual c10::intrusive_ptr processMessage( + Message& request, + std::vector streams) const = 0; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..7bea64f4b19c05dec4201eead386978cf6a32c64 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +class TORCH_API RequestCallbackImpl : public RequestCallbackNoPython { + public: + std::unique_ptr deserializePythonRpcCommand( + std::unique_ptr rpc, + const MessageType& messageType) const override; + + c10::intrusive_ptr processPythonCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processScriptCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processScriptRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processPythonRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processPythonRRefFetchCall( + RpcCommandBase& rpc) const override; + + void handleRRefDelete(c10::intrusive_ptr& rref) const override; + + c10::intrusive_ptr processRpcWithErrors( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const override; + + bool cudaAvailable() const override; + + c10::intrusive_ptr processRRefBackward( + RpcCommandBase& rpc) const override; + + // Helpers to run user-defined functions, operators and other computations. + + c10::intrusive_ptr runJitFunction( + const c10::QualifiedName& name, + std::vector& stack, + const std::vector& streams, + bool isAsyncExecution) const; + + c10::intrusive_ptr runPythonFunction( + const py::object& function, + const std::vector& streams, + bool isAsyncExecution) const; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h new file mode 100644 index 0000000000000000000000000000000000000000..e8632437b14fefed1c3de5c8ea390059c98c87a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h @@ -0,0 +1,120 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// RequestCallback implementation with no Python dependencies. +class TORCH_API RequestCallbackNoPython : public RequestCallback { + public: + c10::intrusive_ptr processMessage( + Message& request, + std::vector streams) const override; + + protected: + virtual std::unique_ptr deserializePythonRpcCommand( + std::unique_ptr rpc, + const MessageType& messageType) const; + + virtual c10::intrusive_ptr processScriptCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processPythonCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr assignOwnerRRef( + const RRefId& rrefId, + const RRefId& forkId, + const c10::intrusive_ptr& valueFuture) const; + + virtual c10::intrusive_ptr processScriptRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processPythonRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr retrieveOwnerRRef(const RRefId& rrefId) const; + + c10::intrusive_ptr processScriptRRefFetchCall( + RpcCommandBase& rpc) const; + + virtual c10::intrusive_ptr processPythonRRefFetchCall( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefUserDelete( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefChildAccept( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefForkRequest( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processForwardAutogradReq( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr processBackwardAutogradReq( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr processCleanupAutogradContextReq( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRunWithProfilingReq( + RpcCommandBase& rpc) const; + + virtual void handleRRefDelete(c10::intrusive_ptr& rref) const; + + c10::intrusive_ptr processRpc( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processRpcWithErrors( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const; + + c10::intrusive_ptr handleError( + const std::exception& e, + const MessageType messageType, + int64_t messageId) const; + + virtual bool cudaAvailable() const; + + virtual c10::intrusive_ptr processRRefBackward( + RpcCommandBase& rpc) const; + + // Helpers to run user-defined functions, operators and other computations. + + c10::intrusive_ptr runJitOperator( + const jit::Operator& op, + std::vector& stack, + const std::vector& streams) const; + + // Helpers to convert various kinds of objects into already-completed futures. + + c10::intrusive_ptr asFuture(IValue value, TypePtr type) const; + + c10::intrusive_ptr asFuture( + c10::intrusive_ptr message) const; + + c10::intrusive_ptr asFuture(std::exception_ptr err) const; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h new file mode 100644 index 0000000000000000000000000000000000000000..9e0ced778117bcf6dc19a32b2a7fc90749172e88 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..7ce5590994cca2b262f3015e183d36c616f69137 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h @@ -0,0 +1,345 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using DeviceMap = std::unordered_map; + +// Default RPC timeout +constexpr float kDefaultRpcTimeoutSeconds = 60; +// Unset RPC timeout. This is the value agent::send() will have if user does not +// pass in a specific timeout, and indicates that we must use the default +// timeout for RPCs. +constexpr float kUnsetRpcTimeout = -1; +constexpr auto kDefaultInitMethod = "env://"; +constexpr float kSecToMsConversion = 1000; +constexpr auto kRpcTimeoutErrorStr = + "RPC ran for more than set timeout ({} ms) and will now be marked with an error"; +constexpr auto kDefaultNumWorkerThreads = 16; + +using steady_clock_time_point = + std::chrono::time_point; +// Input is qualified name string, output is JIT StrongTypePtr +// Same as jit::TypeResolver, did not import jit::TypeResolver to here +// because it could introduce cyclic dependencies. +using TypeResolver = + std::function; + +struct TORCH_API RpcBackendOptions { + RpcBackendOptions() + : RpcBackendOptions(kDefaultRpcTimeoutSeconds, kDefaultInitMethod) {} + + RpcBackendOptions(float rpcTimeoutSeconds, std::string initMethod) + : rpcTimeoutSeconds(rpcTimeoutSeconds), + initMethod(std::move(initMethod)) { + TORCH_CHECK(rpcTimeoutSeconds >= 0, "RPC Timeout must be non-negative"); + } + + float rpcTimeoutSeconds; + std::string initMethod; +}; + +// A globally unique ID to identify an RpcAgent +struct TORCH_API WorkerInfo : torch::CustomClassHolder { + WorkerInfo(std::string name, int64_t id); + + WorkerInfo(std::string name, worker_id_t id); + + bool operator==(const WorkerInfo& rhs) { + return (id_ == rhs.id_) && (name_ == rhs.name_); + } + + static constexpr size_t MAX_NAME_LEN = 128; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string name_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t id_; +}; + +struct TORCH_API RegisterWorkerInfoOnce { + RegisterWorkerInfoOnce(); +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const WorkerInfo& workerInfo); + +// Struct for options to configure the RPC Retry protocol. +struct TORCH_API RpcRetryOptions { + // Using a default constructor like all other Options structs in the RPC + // codebase. TORCH_CHECKs for input validation are done in the + // sendWithRetries function. + RpcRetryOptions() = default; + // Maximum number of times we will retry the RPC + int maxRetries{5}; + // Initial duration between consecutive RPC send attempts + std::chrono::milliseconds rpcRetryDuration{std::chrono::milliseconds(1000)}; + // Constant for exponential backoff used while calculating future wait + // durations + float retryBackoff{1.5}; +}; + +// Struct that stores all the metadata needed to retry a given RPC. +struct TORCH_API RpcRetryInfo { + RpcRetryInfo( + const WorkerInfo& to, + c10::intrusive_ptr message, + c10::intrusive_ptr originalFuture, + int retryCount, + RpcRetryOptions options) + : to_(to), + message_(std::move(message)), + originalFuture_(std::move(originalFuture)), + retryCount_(retryCount), + options_(options) {} + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const WorkerInfo& to_; + c10::intrusive_ptr message_; + // Future that is returned to the caller of sendWithRetries(). + c10::intrusive_ptr originalFuture_; + // Number of send attempts completed so far. + int retryCount_; + RpcRetryOptions options_; +}; + +// ``RpcAgent`` is the base class for sending and receiving RPC messages. It +// provides a unified ``send`` API for both request and response messages, and +// will invoke the given ``RequestCallback`` to process received requests. It +// should immediately become ready to serve request and accept response after +// construction. +class TORCH_API RpcAgent { + public: + // `WorkerInfo` is the globally unique identifier for this RpcAgent instance. + // It contains a ``name_`` field and an ``id_`` field. ``name_`` is the + // globally unique name for this ``RpcAgent``. It is up to the ``RpcAgent`` + // implementation to determine how to resolve names. ``id_`` is the globally + // unique ID for this ``RpcAgent``. This should be determined by the + // ``RpcAgent`` implementation. + // The ``RequestCallback`` will be invoked to handle received requests. This + // ``RpcAgent`` base class makes no assumption on the thread-safeness of the + // ``RequestCallback``. ``RpcAgent`` implementations need to make sure that + // its threading model conform to ``RequestCallback``'s requirement. + // NB: RpcAgent implementations should not start serving requests until + // ``start()`` is called, as there could be other contexts that have not been + // initialized yet at this time. + RpcAgent( + WorkerInfo id, + std::unique_ptr cb, + std::chrono::milliseconds rpcTimeout); + + virtual ~RpcAgent(); + + // Send a message to the ``RpcAgent`` of id ``to`` and returns a + // ``JitFuture`` ptr. The implementation must be asynchronous, i.e., it + // cannot block until it receives the response. + // + // If ``message.isRequest()`` is true, the ``JitFuture`` will be + // completed when the response arrives. For other message types, the Future + // should be ignored by the caller. + virtual c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) = 0; + + // Retries sending the message up to maxRetries times until an ACK is + // received. The duration between consecutive sends is increased over + // time using an exponential backoff algorithm. + // + // Sends ``message`` to the ``RpcAgent`` of id ``to`` and returns a + // ``JitFuture`` ptr, just like send(). Caller can specify the maximum + // number of retries for this RPC (default is 5), initial duration between + // sends (default is 1000ms), and backoff constant (default is 1.5) by + // passing in the RpcRetryOptions struct. This API might end up + // executing a method twice on the remote end (it does not guarantee + // exactly-once semantics). Therefore, the user must ensure their requests + // are idempotent. + c10::intrusive_ptr sendWithRetries( + const WorkerInfo& to, + c10::intrusive_ptr message, + RpcRetryOptions retryOptions = RpcRetryOptions()); + + // Return a reference to the ``WorkerInfo`` of this RpcAgent. + // NB: not using ``std::optional`` here because we might + // need to create a separate RPC API lib and avoid forcing all ``RpcAgent`` + // implementations to depend on libtorch. + const WorkerInfo& getWorkerInfo() const; + + // Return a reference to the ``WorkerInfo`` of the given ``workerName``. + virtual const WorkerInfo& getWorkerInfo( + const std::string& workerName) const = 0; + + virtual const WorkerInfo& getWorkerInfo(worker_id_t id) const = 0; + + virtual std::vector getWorkerInfos() const = 0; + + // Retrieve the timeout for all RPCs. + inline std::chrono::milliseconds getRpcTimeout() const { + return rpcTimeout_.load(); + } + + // Set the timeout for all RPCs + inline void setRpcTimeout(const std::chrono::milliseconds& rpcTimeout) { + rpcTimeout_.store(rpcTimeout); + } + + // Call sync and join all internal threads. This method should be called + // before every RPC process exits. + virtual void join(bool shutdown = false, float timeout = 0) = 0; + + // Synchronize the this process with other ``RpcAgent`` processes. Block until + // all ``RpcAgent``s reach this method and send all pending messages. + virtual void sync() = 0; + + // Sets up backend-agnostic state for accepting requests. Currently, this + // entails setting rpcAgentRunning_ to true, creating the retry thread, and + // calling the backend's startImpl. + void start(); + + // Derived classes must override this function to start accepting requests. + // This is used to initialize any backend-specific state. Users must call + // start, not startImpl, to initialize the RPC Agent. + virtual void startImpl() = 0; + + // Stop accepting requests and shutdown the RPC framework as soon as possible + // by terminating all RPC threads. + void shutdown(); + + // Derived classes must override this function to start accepting requests. + // THis is used to clean up any backend-specific state. Users must call + // shutdown, not shutdownImpl, to shutdown the RPC Agent. + virtual void shutdownImpl() = 0; + + // Check if current RPC agent is set. + static bool isCurrentRpcAgentSet(); + + // Retrieve the valid current RPC agent. + static std::shared_ptr getCurrentRpcAgent(); + + // Set the current RPC agent. + static void setCurrentRpcAgent(std::shared_ptr rpcAgent); + + // Retrieve metrics as KV map + virtual std::unordered_map getMetrics() = 0; + + // Retrieve debug info in addition to metrics as KV map + virtual std::unordered_map getDebugInfo(); + + // Flag to control whether GIL wait times + // should be profiled or not. + void enableGILProfiling(bool flag); + + // Retrieve whether we should profile GIL wait times or not. + bool isGILProfilingEnabled(); + + // Set type resolver that will be passed to JIT pickler to resolver type Ptr + // based on type str. + void setTypeResolver(std::shared_ptr typeResolver); + + // Get the type resolver + std::shared_ptr getTypeResolver(); + + // Retrieves the device map for the provided destination worker. + virtual DeviceMap getDeviceMap(const WorkerInfo& dst) const; + + // Retrieve the (non-CPU) devices that are supported by the agent. + virtual const std::vector& getDevices() const; + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const WorkerInfo workerInfo_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::unique_ptr cb_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic rpcTimeout_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic profilingEnabled_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::shared_ptr typeResolver_; + // Atomic boolean indicating whether this agent is running. It controls + // whether several background threads should be running. It is set in + // RpcAgent::start() and unset in the derived class shutdown(). + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic rpcAgentRunning_; + + private: + static std::shared_ptr currentRpcAgent_; + // Add GIL wait time data point to metrics + virtual void addGilWaitTime(const std::chrono::microseconds gilWaitTime) = 0; + friend class PythonRpcHandler; + + // Map that stores metadata for RPC's that may need to be re-tried as well as + // the timepoint at which we should re-try them. + std::map< + steady_clock_time_point, + std::unordered_set>> + rpcRetryMap_; + + // Thread that checks for retryable RPC's in the rpcRetryMap_ and sleeps until + // the next unACKed RPC's timeout has expired. + std::thread rpcRetryThread_; + + // Function that rpcRetryThread_ calls in a loop as long as RpcAgent is + // running. + void retryExpiredRpcs(); + + // This is the callback attached to futures corresponding to send retries. + // This handles 3 cases: 1). send was completed, 2). send failed with an + // error and we've done maxRetries failed send attempts, and 3). send + // failed with an error and we have more retries to go. In case 1, we mark + // the original future as complete. In case 2, we mark the future with an + // error and do not retry again. In case 3, we move the RpcRetryInfo struct + // to another time point in the map to schedule the RPC for a future send. + void rpcRetryCallback( + JitFuture& message, + steady_clock_time_point newTime, + std::shared_ptr earliestRpc); + + // Function that uses the exponential backoff algorithm to compute the next + // time point to retry a given RPC. + inline steady_clock_time_point computeNewRpcRetryTime( + RpcRetryOptions& options, + int retryCount) { + // The exponential backoff algorithm being used here is: + // newTime = timeNow + (retryDuration * (backoffConstant ^ retryCount)). + std::chrono::milliseconds timedelta = + std::chrono::duration_cast( + options.rpcRetryDuration * pow(options.retryBackoff, retryCount)); + return std::chrono::time_point_cast( + std::chrono::steady_clock::now() + timedelta); + } + + // Condition Variable to signal when the rpcRetryMap_ has been populated. + std::condition_variable rpcRetryMapCV_; + + // Mutex to protect RpcRetryMap_. + std::mutex rpcRetryMutex_; +}; + +} // namespace torch::distributed::rpc + +namespace std { +template <> +struct hash { + std::size_t operator()( + const torch::distributed::rpc::WorkerInfo& worker_info) const noexcept { + return worker_info.id_; + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h new file mode 100644 index 0000000000000000000000000000000000000000..2ea338813b121c913e1cd0d78fdf5dfe22c03bb1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// Base class for all RPC request and responses. +class RpcCommandBase { + public: + // Need to override this to serialize the RPC. This should destructively + // create a message for the RPC (Hence the &&). + c10::intrusive_ptr toMessage() && { + JitRRefPickleGuard jitPickleGuard; + return std::move(*this).toMessageImpl(); + } + virtual c10::intrusive_ptr toMessageImpl() && = 0; + virtual ~RpcCommandBase() = 0; +}; + +inline RpcCommandBase::~RpcCommandBase() = default; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h new file mode 100644 index 0000000000000000000000000000000000000000..6f1703a51e6f67be61b2591e3728eda4da5d5566 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h @@ -0,0 +1,340 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::distributed::rpc { + +namespace callback { +// It's the callback for RemoteCall. +void TORCH_API +confirmPendingUser(const JitFuture& jitFuture, const ForkId& expectedForkId); + +// It's the callback for finishing creating owner rref, it returned deletedRRef, +// so that the deletedRRef can be handled under GIL in python_functions.cpp if +// deletedRRef contains python object. +c10::intrusive_ptr TORCH_API +finishCreatingOwnerRRef(const JitFuture& jitFuture, const RRefId& rrefId); +} // namespace callback + +// Manages RRef lifetime and keeps track of RRef forks. +class TORCH_API RRefContext { + public: + static RRefContext& getInstance(); + // NB: This method must be called before destructing RRefContext singleton. + // Similar to delForkOfOwner, this method returns a vector of OwnerRRefs that + // hold py::object. The call-site is also responsible for resetting those + // shared_ptr objects with a GIL. See comments at delForkOfOwner() for more + // details. + static std::vector> destroyInstance( + bool ignoreRRefLeak = true); + + static void handleException(const JitFuture& jitFuture); + + // handle exception without throw ::c10::Error again + static void handleExceptionSilent(const JitFuture& jitFuture); + + RRefContext(const RRefContext&) = delete; + RRefContext(RRefContext&& other) = delete; + void operator=(const RRefContext&) = delete; + RRefContext& operator=(RRefContext&& other) = delete; + + ~RRefContext(); + + // get the worker id of the current worker + inline worker_id_t getWorkerId() const { + return agent_->getWorkerInfo().id_; + } + + // get the worker name of the current worker + inline const std::string& getWorkerName() const { + return agent_->getWorkerInfo().name_; + } + + // generate a globally unique ID + inline GloballyUniqueId genGloballyUniqueId() { + return GloballyUniqueId(getWorkerId(), nextLocalId_++); + } + + inline const std::shared_ptr& agent() const { + return agent_; + } + + // create a ``UserRRef`` owned by the worker ``ownerId`` + c10::intrusive_ptr createUserRRef( + worker_id_t ownerId, + const TypePtr& type); + + // Convert an RRefForkData into an RRef. This RRef could be user or owner. + // This RRef could have already existed before, or could be created in this + // method, we pass type here to validate or help the rref creation. + c10::intrusive_ptr getOrCreateRRef( + const RRefForkData& rfd, + const TypePtr& type); + + // Get the ``OwnerRRef`` of id ``rrefId``. If it does not exist, create a new + // one. This function is called in two places: + // 1. when processing ``rpc.remote()``, i.e., ``SCRIPT_REMOTE_CALL`` + // ``PYTHON_REMOTE_CALL``. + // 2. when unpickling ``OwnerRRef``. + // What's common in these two cases are, 1) the RRefId is already generated + // 2) the TypePtr is presented. So it can always create the ``OwnerRRef`` if + // it is not yet available. + c10::intrusive_ptr getOrCreateOwnerRRef( + const RRefId& rrefId, + const TypePtr& type); + + // Create an empty owner rref of type. + // This method is called to first time generate an ``OwnerRRef``, e.g., + // 1) ``rpc.RRef(obj)`` + // 2) create the ``OwnerRRef`` on `rpc.remote()` caller side. + // What's common in these two cases are, 1) the RRefId hasn't been generated + // 2) the TypePtr is presented. + c10::intrusive_ptr createOwnerRRef(const TypePtr& type); + + // Returns a Future of the OwnerRRef, which will be marked completed when + // ``OwnerRRef`` is created. This method is used when the TypePtr is not + // available, e.g., when processing to_here(). The forceCreated flag can be + // used to ensure that the rref is created on the owner, otherwise throw in + // cases where the user of this API expects this to return a completed future. + // Note that the return value is a intrusive_ptr to a c10::ivalue::Future that + // holds the RRef. + c10::intrusive_ptr getOwnerRRef( + const RRefId& rrefId, + bool forceCreated = false); + + // Adding the RRefId of an OwnerRRef into the forks_ map. This is useful when + // making a remote call to self, which as for now, still goes through serde + // and invokes request callback. In this case, the OwnerRRef has already been + // created on the send side, and we need to pass it to the receive side, + // instead of creating a new OwnerRRef. This is done by adding the OwnerRRef + // into owners_. However, that alone is not enough, as it could be deleted + // when all UserRRef die, which would then remove the OwnerRRef from owners_ + // and this could happen before the self remote call finishes. To prevent + // that, this API adds the RRefId as a ForkId, which will then delete the + // ForkId when the self remote is done. + void addSelfAsFork(c10::intrusive_ptr& rref); + + // Register a fork of the ``OwnerRRef``, and inserts a intrusive_ptr of the + // ``OwnerRRef`` in a map to keep it alive. + void addForkOfOwner(const RRefId& rrefId, const ForkId& forkId); + // Performs the same function as addForkOfOwner but ignores duplicate + // requests. This idempotent function is used with RREF_FORK_REQUEST calls, + // whereas all other message types use the non-idempotent variant. + void addForkOfOwnerIfNotPresent(const RRefId& rrefId, const ForkId& forkId); + // Delete a fork of the ``OwnerRRef``. NB: this could trigger deletion on the + // IValue or py::object. For the later, this method will acquire GIL. + // NB: If this fork deletion triggered deleting OwnerRRef, this method will + // return a shared_ptr to the OwnerRRef, which is likely to be the last + // shared_ptr instance for it. Therefore, deleting this shared_ptr + // will also trigger deleting the object it points to. If OwnerRRef holds a + // py::object, deleting it require GIL. The call site should guarded it with + // a GIL and reset the shared_ptr. The GIL-guarded deletion is intentionally + // left out of this function to avoid creating dependency on pybind. + c10::intrusive_ptr delForkOfOwner( + const RRefId& rrefId, + const ForkId& forkId); + + // Invoked when pickling an RRef to setup child/fork properly + RRefForkData prepareChildFork(const c10::intrusive_ptr& rref); + // Invoked when unpickling an RRef to send RREF_FORK_REQUEST to owner and + // send RREF_CHILD_ACCEPT to the parent. + // NB: forkId is necessary here as the rref could be an OwnerRRef + void notifyOwnerAndParentOfFork( + const ForkId& forkId, + worker_id_t parent, + const c10::intrusive_ptr& rref); + + // When a UserRRef is forked to another worker (user or owner), it is added + // into pendingChildren_ to be held alive until it receives RREF_CHILD_ACCEPT + // from the child. + // NB: This is necessary for both user and owner child. As we do not have FIFO + // communication between workers, we need this strategy to make sure that all + // previously submitted rpc/remote calls are acked before sending out the + // RREF_USER_DELETE message. Otherwise, the OwnerRRef could be deleted too + // soon. + void addPendingChild( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + void delPendingChild(const ForkId& forkId); + + // When a UserRRef is created, it is added into pendingUsers_ to be held alive + // until it receives RREF_USER_ACCEPT from the owner. + void addPendingUser( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + void delPendingUser(const ForkId& forkId); + void addConfirmedUser( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + + // Retrieve a pending user given the fork ID. Throws if the user has already + // been confirmed (i.e. is no longer in the pendingUsers_ map). + c10::intrusive_ptr getPendingUser(const ForkId& forkId); + + // Start recording new pending UserRRefs. All pending UserRRefs introduced + // after this point will be put into the thread_local userTable_, which will + // then be consumed and cleared in waitForThreadLocalPendingRRefs(). + void recordThreadLocalPendingRRefs(); + // End recording new pending UserRRefs, and clear the thread_local userTable_. + // Returns a Future which will be marked as completed when all pending + // UserRRefs in the current userTable_ are confirmed by their owners. The bool + // value in the Future is unused. + // This method is useful to make sure RRefs in user function arguments are + // confirmed before launching user code. + // NB: Callers of this method does not need to keep the returned Future alive, + // because this Future is already captured in callbacks of the + // PendingUserState. If there is no pending UserRRefs, this method returns a + // completed future. + c10::intrusive_ptr waitForThreadLocalPendingRRefs(); + // Only call this function when there are errors during a recording session, + // and it is likely that waitForThreadLocalPendingRRefs() cannot be invoked + // properly. + // TODO: make this a context guard + void clearRecordedPendingRRefsOnError(); + + void delUser( + const worker_id_t owner, + const RRefId& rrefId, + const ForkId& forkId); + void delAllUsersAndUnforkedOwners(std::chrono::milliseconds timeoutMillis); + + std::unordered_map getDebugInfo(); + + private: + struct PendingUserState { + PendingUserState(c10::intrusive_ptr rref) + : rref_(std::move(rref)), + confirmationFuture_(c10::make_intrusive(BoolType::get())) { + } + + inline void confirm() { + c10::static_intrusive_pointer_cast(rref_)->confirm(); + confirmationFuture_->markCompleted(); + } + + c10::intrusive_ptr rref_; + // Use Future.wait() and Future.markCompleted() to block and unblock user + // functions. The bool value wrapped by the future_ is not used. + c10::intrusive_ptr confirmationFuture_; + }; + + RRefContext(std::shared_ptr /*agent*/); + + c10::intrusive_ptr createUserRRef( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + const TypePtr& type); + + void finishForkRequest(const ForkId& forkId, worker_id_t parent); + + // If there is any leak on any RRef, this method will throw an error. + void checkRRefLeaks(bool ignoreRRefLeak); + + static std::atomic nextLocalId_; + + const std::shared_ptr agent_; + mutable std::mutex mutex_; + // Keep OwnerRRefs alive until there is no living UserRRefs. + std::unordered_map, RRefId::Hash> owners_; + // A map to track OwnerRRefs that are requested but not yet created. This can + // happen if the to_here() message is processed on the owner before the + // corresponding creator rpc.remote() message. If this happens, instead of + // to_here() RPC thread to block waiting for the OwnerRRef creation, the + // RRefContext returns a Future, so that the RPC request processing logic can + // attach subsequent code as a callback to that Future. + // NB: the OwnerRRefs in this map must be cleared when the corresponding + // OwnerRRef is created. Note that the values in this map are intrusive_ptrs + // to c10::ivalue::Future that will be marked completed with the owner RRef. + std::unordered_map, RRefId::Hash> + pendingOwners_; + // Tracks known living UserRRefs of an OwnerRRef + std::unordered_map< + RRefId, + std::unordered_set, + RRefId::Hash> + forks_; + + // This cond var is used by deleteAllUsers(), a event notification is sent if + // number of pending UserRRef or UserRRef children is reduced, or + // number of owned OwnerRRef is reduced. + std::condition_variable deleteAllUsersCV_; + // The follow 3 maps keep UserRRefs alive by holding a intrusive_ptr to the + // RRef instances. A UserRRef must be added into this map if any of the + // following two conditions is true: + // + // (1) A UserRRef has not been accepted by owner yet. + // + // It can be used or shared, but cannot be deleted, and hence kept alive + // in this map. A message of type RREF_USER_ACCEPT will move the + // corresponding RRef from pendingUsers_ map to confirmedUsers_ map. + std::unordered_map, ForkId::Hash> + pendingUsers_; + // UserRRefs are added into this map when it is confirmed by the owner. + // When destroying RRefContext this map helps to find local UserRRefs + // and send delete messages if they are still not deleted by Python + // garbage collection. + std::unordered_map, ForkId::Hash> + confirmedUsers_; + + // (2) A UserRRef has forked a child UserRRef which has not been accepted by + // the owner yet. + // + // In this case, this UserRRef cannot send out RREF_USER_DELETE message, + // as it could potentially trigger the OwnerRRef been deleted before the + // owner learns about the forked child. + std::unordered_map, ForkId::Hash> + pendingChildren_; + + // The RRef context performs its operations through async RPC requests, in + // order to not block the user code. Therefore the RRef context's state may be + // lagging a bit behind what it is intended to be, while it waits for these + // requests to complete. To allow syncing when needed, we store the count of + // these pending requests, so that users can wait for it to reach zero. + std::atomic numPendingFutures_{0}; + + std::mutex destroyedMutex_; + bool destroyed_{false}; + + // Thread local states to keep UserRRefs deserialized from user function + // arguments. + static thread_local std::vector> userTable_; + // A flag indicating whether subsequently created UserRRefs should be added to + // the thread_local userTable_. The flag is set to true before serializing + // RPC arguments and then set to false before running the corresponding + // user code. See addPendingUser and delPendingUser for more details. + // NB: The reason for having this flag is because addPendingUser are called in + // two cases, and we only want to track the 2nd case. + // (1) RRef as the return value: when calling rpc.remote, the UserRRef on the + // caller side is added to the context using addPendingUser. + // (2) RRef as an argument: When running an RPC using RRefs as arguments, the + // RRef is forwarded to the callee as new UserRRefs (if the callee is not + // the owner). In this case, we block running the user function until all + // UserRRefs are confirmed by the owner. + // This contract guarantees that no UserRRefs can be used remotely without + // confirmation. Note that, however, the UserRRef created by rpc.remote can + // still be passed to local functions as arguments and used there. This is by + // design, because this feature is especially useful when, say a master node + // creates multiple UserRRefs in a loop and then shares them with other nodes. + // Blocking every iteration in the loop until RRefs are confirmed will slow + // this down. This nuance on UserRRef can be interpreted as we only make + // exceptions for UserRRef creators. And using the UserRRef on its creator + // without confirmation is OK, because the creator would either call to_here + // or forward the UserRRef, and both would then require confirmations from the + // owner. + static thread_local bool recording_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..ae01733c5d9dab82ad33d47164120d1037ecb3f8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h @@ -0,0 +1,426 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::distributed::rpc { + +class RRef; +class RRefContext; +class UserRRef; + +constexpr int OWNER_IDX = 0; // index of ownerId in the tuple +constexpr int RREFID_ON_IDX = 1; // index of RRefId.createdOn_ in the tuple +constexpr int RREFID_ID_IDX = 2; // index of RRefId.localId_ in the tuple +constexpr int FORKID_ON_IDX = 3; // index of ForkId.createdOn_ in the tuple +constexpr int FORKID_ID_IDX = 4; // index of ForkId.localId_ in the tuple +constexpr int PARENT_IDX = 5; // index of parent in the tuple +constexpr int TYPE_IDX = 6; // index of parent in the tuple + +// NB: if more fields are added, make sure this field is also bumped +constexpr int RFD_TUPLE_SIZE = 7; // number of RRefForkData fields in py::tuple + +// Represents fork of an RRef to be sent over the wire. +struct TORCH_API RRefForkData { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t ownerId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId forkId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t parent_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string typeStr_; + + RRefForkData( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + worker_id_t parent, + std::string typeStr); +}; + +// Note [RRef Protocol] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~ +// +// [Background] +// +// RRef stands for Remote REFerence. Each RRef is owned by a single worker +// (i.e., owner) and can be used by multiple users. The owner stores the real +// data referenced by its RRefs. RRef needs to support fast and scalable RPC. +// Hence, in the design, we avoid using a single global master to keep RRef +// states, instead owners will keep track of the global reference counts +// for its RRefs. Every RRef can be uniquely identified by a global RRefId, +// which is assigned at the time it is first created either on a user or on the +// owner. +// +// On the owner worker, there is only one OwnerRRef instance, which contains the +// real data, while on user workers, there can be as many UserRRefs as +// necessary, and UserRRef does not hold the data. All usage on the OwnerRRef +// should retrieve the unique OwnerRRef instance using the globally unique +// RRefId. //A UserRRef will be created when it is used as an argument or return +// value in dist.rpc or dist.remote call, but RRef forking and reference +// counting (RC) are completely transparent to applications. Every UserRRef will +// also have its globally unique ForkId. +// +// [Assumptions] +// +// 1. Transient Network Failures +// +// TODO: current RRef implementation does not tolerate failures +// +// The RRef design handles transient network failures by retrying +// messages. Node crashes or permanent network partition is beyond the scope. +// When those incidents occur, the application may take down all workers, revert +// to the previous checkpoint, and resume training. +// +// 2. Non-idempotent UDFs +// +// We assume UDFs are not idempotent and therefore cannot be retried. However, +// internal RRef control messages are idempotent and retried upon message +// failure. +// +// TODO: RRef internal messages are not yet idempotent +// +// 3. Out of Order Message Delivery +// +// We do not assume message delivery order between any pair of nodes, because +// both sender and receiver are using multiple threads. There is no guarantee on +// which message will be processed first. +// +// [RRef Lifetime] +// +// The goal of the protocol is to delete an OwnerRRef at an appropriate time. +// The right time to delete an OwnerRRef is when there are no living UserRRefs +// and Python GC also agrees to delete the OwnerRRef instance on the owner. The +// tricky part is to determine if there are any living UserRRefs. +// +// A user can get a UserRRef in three situations: +// +// (1). Receiving a UserRRef from the owner. +// (2). Receiving a UserRRef from another user. +// (3). Creating a new UserRRef owned by another worker. +// +// (1) is the simplest case where the owner initiates the fork, and hence it can +// easily increment local RC. The only requirement is that any UserRRef must +// notify the owner before destruction. Hence, we need the first guarantee: +// +// G1. The owner will be notified when any UserRRef is deleted. +// +// As messages might come delayed or out-of-order, we need more one guarantee to +// make sure the delete message is not sent out too soon. Let us first introduce +// a new concept. If A sends an RPC to B that involves an RRef, we call the RRef +// on A the parent RRef and the RRef on B the child RRef. +// +// G2. Parent RRef cannot be deleted until the child RRef is confirmed by the +// owner. +// +// Under (1), where the caller is UserRRef and callee is OwnerRRef, it simply +// means that the user will not send out the delete message until all previous +// messages are ACKed. Note that ACKed does not mean the owner finishes +// executing the function, instead, it only means the owner has retrieved its +// local OwnerRRef and about to pass it to the function, which is sufficient to +// keep the OwnerRRef alive even if the delete message from the user arrives at +// the owner before the function finishes execution. +// +// With (2) and (3), it is possible that the owner only partially knows the RRef +// fork graph or not even knowing it at all. For example, the RRef could be +// constructed on a user, and before the owner receives the RPC call, the +// creator user might have already shared the RRef with other users, and those +// users could further share the RRef. One invariant is that the fork graph of +// any RRef is always a tree rooted at the owner, because forking an RRef always +// creates a new RRef instance, and hence every RRef has a single parent. One +// nasty detail is that when an RRef is created on a user, technically the owner +// is not its parent but we still consider it that way and it does not break the +// argument below. +// +// The owner's view on any node (fork) in the tree has three stages: +// +// 1) unknown -> 2) known -> 3) deleted. +// +// The owner's view on the entire tree keeps changing. The owner deletes its +// OwnerRRef instance when it thinks there are no living UserRRefs, i.e., when +// OwnerRRef is deleted, all UserRRefs could be either indeed deleted or +// unknown. The dangerous case is when some forks are unknown and others are +// deleted. +// +// G2 trivially guarantees that no parent UserRRef Y can be deleted before the +// owner knows all of Y's children UserRRefs. +// +// However, it is possible that the child UserRRef Z may be deleted before the +// owner knows its parent Y. More specifically, this can happen when all of Z's +// messages are processed by the owner before all messages from Y, including the +// delete message. Nevertheless, this does not cause any problem. Because, at +// least one of Y's ancestor will be alive, and it will prevent the owner from +// deleting the OwnerRRef. Consider the following example: (NB: this scenario +// will no longer relevant when we block UDF until all RRefs are confirmed by +// the owner) +// +// OwnerRRef -> A -> Y -> Z +// +// OwnerRRef forks to A, then A forks to Y, and Y forks to Z. Z can be deleted +// without OwnerRRef knowing Y. However, the OwnerRRef will at least know A, as +// the owner directly forks the RRef to A. A won't die before the owner knows Y. +// +// Things get a little trickier if the RRef is created on a user: +// +// OwnerRRef +// ^ +// | +// A -> Y -> Z +// +// If Z calls to_here on the UserRRef, the owner at least knows A when Z is +// deleted, because otherwise to_here wouldn't finish. If Z does not call +// to_here, it is possible that the owner receives all messages from Z before +// any message from A and Y. In this case, as the real data of the OwnerRRef has +// not been created yet, there is nothing to be deleted either. It is the same +// as Z does not exist at all Hence, it's still OK. +// +// See #26759 for more details and discussions. +// +// TODO: make RRef an IValue, and edit createStackForSchema accordingly +// TODO: make RRef system messages idempotent and retry on failures. +// +// ``RRef`` is the base type for both ``UserRRef`` and ``OwnerRRef``. +// Each ``RRef`` has a globally unique ``RRefId``. +class TORCH_API RRef : public RRefInterface { + public: + // RRef is made NOT copyable NOT movable to prevent messing up reference + // counting. + explicit RRef(const RRef& other) = delete; + explicit RRef(RRef&& other) = delete; + RRef& operator=(RRef&& other) = delete; + + ~RRef() override = default; + + // returns the worker id of the owner + inline worker_id_t owner() const override { + return ownerId_; + } + + // returns the worker name of the owner + inline std::string ownerName() const override { + return RpcAgent::getCurrentRpcAgent()->getWorkerInfo(ownerId_).name_; + } + + // returns the worker info of the owner + inline WorkerInfo ownerWorkerInfo() const { + return RpcAgent::getCurrentRpcAgent()->getWorkerInfo(ownerId_); + } + + // Returns the globally unique RRefId of this RRef + inline const RRefId& rrefId() const { + return rrefId_; + } + + inline bool isPyObj() const { + return type_ == PyObjectType::get(); + } + inline const TypePtr type() const override { + return type_; + } + + // Save the future corresponding to the creation of this RRef on a remote + // node. Note that this is only set when processing requests invoked with + // rpc.remote. This is only used to get the future corresponding to the rref + // for profiling use cases. + inline void registerOwnerCreationFuture(c10::intrusive_ptr fut) { + ownerCreationFuture_ = std::move(fut); + } + + // Get the future corresponding to the creation of this rref. + inline c10::intrusive_ptr getOwnerCreationFuture() const { + return ownerCreationFuture_; + } + + // Check if creation of this RRef on owner node has timed out. + inline bool getTimedOut() const { + return timedOut_.load(); + } + + // Dispatches an error to the correct handler based on its RPCErrorType. + void handleError(RPCErrorType errorType, const JitFuture& JitFuture); + + // Send delete UserRRef request to Owner, + // if the request hasn't been sent yet. + // There are 2 cases to call it, + // 1, Python GC decides end of UserRRef lifetime, calling destructor. + // 2, RPC module graceful shutdown calls it on all UserRRefs tracked + // in the RRefContext. + virtual void tryDel() {} + + protected: + // Indicates that the creation of this RRef on owner node has timed out. + inline void setTimedOut() { + timedOut_ = true; + } + friend class RRefContext; + + RRef(worker_id_t ownerId, const RRefId& rrefId, TypePtr type); + + virtual RRefForkData fork() const; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const worker_id_t ownerId_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic timedOut_{false}; + + // type field to denote the type of the element that the RRef is holding + // it could be any TypePtr that JIT support, including PyObjectType + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const TypePtr type_; + // Future corresponding to request to create RRef on remote node. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + c10::intrusive_ptr ownerCreationFuture_; +}; + +// ``UserRRef`` represents a user of an RRef. Besides the ``RRefId``, each user +// also has a globally unique ``ForkId`` to identify this user. ``UserRRef`` +// never owns the real value, the only way to get the value of the ``RRef`` is +// to call ``to_here()`` and get a copy.. +class TORCH_API UserRRef final : public RRef { + public: + UserRRef(const UserRRef& other) = delete; + UserRRef(UserRRef&& other) = delete; + UserRRef& operator=(const UserRRef& other) = delete; + UserRRef& operator=(UserRRef&& other) = delete; + + UserRRef( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + TypePtr type); + + inline bool isOwner() const override { + return false; + } + + inline bool confirmedByOwner() const override { + return confirmedByOwner_; + } + + // Returns the globally unique ForkId of this RRef + const ForkId& forkId() const; + + // Get of copy of the value from the ``OwnerRRef``. If the value is not ready + // yet, this call will block. + IValue toHere( + const float timeoutSeconds = + torch::distributed::rpc::kUnsetRpcTimeout) const; + + void tryDel() override; + + // Will be called when refcount reaches 0. + // Upon destruction, this ``UserRRef`` will tell the owner to deref. + void release_resources() override; + + // Will be called when both refcount and weakcount reach 0. See + // https://github.com/pytorch/pytorch/blob/9116f02bebf3a5260feef5732d36c54ecb3b4033/c10/util/intrusive_ptr.h#L204 + // This is called on destructing the wrapping intrusive_ptr_target instance + // and it's data members. + ~UserRRef() override; + + private: + friend class RRefContext; + + RRefForkData fork() const override; + inline void confirm() { + confirmedByOwner_ = true; + } + + const ForkId forkId_; + + // Indicates if this user has sent delete message to it's owner. + // Note, thread safety is needed because delete message could be sent by + // either the destructor called by Python garbage collection or RRefContext + // proactive cleanup on RPC graceful shutdown. + std::mutex deletedOnOwnerMutex_; + bool deletedOnOwner_{false}; + // Indicating whether this UserRRef has been confirmed by its owner. + std::atomic confirmedByOwner_; +}; + +// Keep the template only on the derived class because ``RRefContext`` needs to +// erase the type on ``RRef`` and keep them in one map. +class TORCH_API OwnerRRef final : public RRef { + public: + OwnerRRef(const OwnerRRef& other) = delete; + OwnerRRef(OwnerRRef&& other) = delete; + OwnerRRef& operator=(const OwnerRRef& other) = delete; + OwnerRRef& operator=(OwnerRRef&& other) = delete; + + OwnerRRef( + worker_id_t ownerId, + const RRefId& rrefId, + TypePtr type, + std::vector devices); + + OwnerRRef( + worker_id_t ownerId, + const RRefId& rrefId, + TypePtr type, + std::optional value, + std::vector devices); + + inline bool isOwner() const override { + return true; + } + + // OwnerRRef is always confirmed, while UserRRef is only confirmed when the + // owner knows about it. + inline bool confirmedByOwner() const override { + return true; + } + + // Get a constant reference of the real value. This method will block if the + // value is not ready. This method does not need GIL as it does not create + // any new py::object. It will throw if there is an error. + const IValue& getValue() const; + + // Set the value of this ``OwnerRRef``. This method does not need GIL as it + // does not create any new py::object. + void setValue(IValue&& value); + // Sets the value of this ``OwnerRRef`` to contain an exception. + void setError(std::exception_ptr eptr); + + // Has a value or error been set? + bool hasValue() const; + // Gets a future that is satisfied when the value or error is set. + c10::intrusive_ptr getFuture(); + + private: + friend class RRefContext; + + c10::intrusive_ptr future_; +}; + +TORCH_API std::ostream& operator<<(std::ostream& os, const RRef& rref); + +// Helper function that casts from c10::RRefInterface to OwnerRRef +inline TORCH_API c10::intrusive_ptr fromRRefInterface( + const c10::intrusive_ptr& rrefInterface) { + return c10::static_intrusive_pointer_cast(rrefInterface); +} + +// Helper function that casts from OwnerRRef to c10::RRefInterface +inline TORCH_API c10::intrusive_ptr fromOwnerRRef( + const c10::intrusive_ptr& ownerRRef) { + return c10::static_intrusive_pointer_cast(ownerRRef); +} + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h new file mode 100644 index 0000000000000000000000000000000000000000..b96713516e679436aee835898be9d554283e0a3e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h @@ -0,0 +1,168 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Temporary solution of RRef operations. +// TODO: Remove all these messages and use rpc + registered functions instead. +class TORCH_API RRefMessageBase : public RpcCommandBase { + public: + RRefMessageBase(const RRefId& rrefId, MessageType type) + : rrefId_(rrefId), type_(type) {} + + const RRefId& rrefId(); + + protected: + // NOLINTNEXTLINE(cppcoreguidelines*) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines*) + const MessageType type_; +}; + +class TORCH_API ForkMessageBase : public RRefMessageBase { + public: + ForkMessageBase(const RRefId& rrefId, const ForkId& forkId, MessageType type) + : RRefMessageBase(rrefId, type), forkId_(forkId) {} + + const ForkId& forkId(); + + c10::intrusive_ptr toMessageImpl() && override; + static std::pair fromMessage( + const Message& message, + MessageType type); + + protected: + // NOLINTNEXTLINE(cppcoreguidelines*) + const ForkId forkId_; +}; + +// UserRRef uses this message to fetch the remote RRef value from the owner. +class TORCH_API ScriptRRefFetchCall final : public RRefMessageBase { + public: + ScriptRRefFetchCall(worker_id_t fromWorkerId, const RRefId& rrefId) + : RRefMessageBase(rrefId, MessageType::SCRIPT_RREF_FETCH_CALL), + fromWorkerId_(fromWorkerId) {} + + inline worker_id_t fromWorkerId() const { + return fromWorkerId_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t fromWorkerId_; +}; + +class TORCH_API PythonRRefFetchCall final : public RRefMessageBase { + public: + PythonRRefFetchCall(worker_id_t fromWorkerId, const RRefId& rrefId) + : RRefMessageBase(rrefId, MessageType::PYTHON_RREF_FETCH_CALL), + fromWorkerId_(fromWorkerId) {} + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t fromWorkerId_; +}; + +// OwnerRRef uses this message to send the RRef value to a remote UserRRef +class TORCH_API RRefFetchRet : public RpcCommandBase { + public: + RRefFetchRet(std::vector values, MessageType type) + : values_(std::move(values)), type_(type) {} + + const std::vector& values(); + c10::intrusive_ptr toMessageImpl() && override; + + private: + std::vector values_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const MessageType type_; +}; + +class TORCH_API ScriptRRefFetchRet final : public RRefFetchRet { + public: + explicit ScriptRRefFetchRet(std::vector values) + : RRefFetchRet(std::move(values), MessageType::SCRIPT_RREF_FETCH_RET) {} + + static std::unique_ptr fromMessage( + const Message& message); +}; + +class TORCH_API PythonRRefFetchRet final : public RRefFetchRet { + public: + explicit PythonRRefFetchRet(std::vector values) + : RRefFetchRet(std::move(values), MessageType::PYTHON_RREF_FETCH_RET) {} + + static std::unique_ptr fromMessage( + const Message& message); +}; + +// UserRRef (regardless it's the creator or not) uses this message to notify +// OwnerRRef on delete. +class TORCH_API RRefUserDelete final : public ForkMessageBase { + public: + RRefUserDelete(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::RREF_USER_DELETE) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +class TORCH_API RemoteRet final : public ForkMessageBase { + public: + RemoteRet(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::REMOTE_RET) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +// A child RRef uses this message to notify its parent that the child has been +// confirmed by the owner. +class TORCH_API RRefChildAccept final : public RpcCommandBase { + public: + explicit RRefChildAccept(const ForkId& forkId) : forkId_(forkId) {} + + const ForkId& forkId() const; + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId forkId_; +}; + +// A child RRef uses this message to send a fork request to the owner. +class TORCH_API RRefForkRequest final : public ForkMessageBase { + public: + RRefForkRequest(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::RREF_FORK_REQUEST) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +class TORCH_API RRefAck final : public RpcCommandBase { + public: + RRefAck() = default; + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h new file mode 100644 index 0000000000000000000000000000000000000000..b4073693ec762921a3816b558a8c76913b940357 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using torch::jit::Operator; + +// A ScriptCall instance represents an invocation of a builtin operator for a +// TorchScript function. If it is a builtin operator, it +// contains a shared ptr to the `Operator` and a list of arguments. +// If it is a TorchScript function, it contains a non empty qualifiedName string +// to the TorchScript function schema name and a list of arguments. +class TORCH_API ScriptCall : public RpcCommandBase { + public: + // Constructor for builtin operator call. + ScriptCall(std::shared_ptr op, std::vector&& stack); + // Constructor for TorchScript function call. + ScriptCall( + const c10::QualifiedName& qualifiedName, + std::vector&& stack, + const bool isAsyncExecution = false); + + bool hasOp() const; + std::shared_ptr op() const; + bool hasQualifiedName() const; + const c10::QualifiedName& qualifiedName() const; + // return the argument stack of this builtin operator + const std::vector& stack() const; + std::vector& stackRef(); + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + ~ScriptCall() override = default; + + protected: + virtual void toIValues(std::vector& ivalues) const; + static std::unique_ptr fromIValues( + std::vector& ivalues); + + private: + // Given an operator symbol and a string schema, return the matched operator. + static std::shared_ptr matchOperator(const std::string& str_schema); + + static const std::string BUILTIN_OP_NAMESPACE_; + static const std::string ATEN_PREFIX_; + + // This field has value if this ScriptCall represents invocation of a builtin + // operator. + std::optional> op_; + // This field has non empty string if this ScriptCall represents invocation of + // an annotated torchscript function defined by users. + std::optional qualifiedName_; + std::vector stack_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..6ae72a328d457a150ae78f30c8c4c1d18c4b2664 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using torch::jit::Operator; + +// A ScriptRemoteCall instance represents an invocation of `dist.remote` on a +// builtin operator. Currently, it does not support using RRef as arguments yet. +// Besides the operator and a vector of arguments, ScriptRemoteCall also +// contains the RRefId and the ForkId of the return value RRef. +class TORCH_API ScriptRemoteCall final : public ScriptCall { + public: + // Constructor for builtin operator call. + ScriptRemoteCall( + std::shared_ptr op, + std::vector&& stack, + const RRefId& retRRefId, + const ForkId& retForkId); + + // Constructor for TorchScript function call. + ScriptRemoteCall( + const c10::QualifiedName& qualifiedName, + std::vector&& stack, + const RRefId& retRRefId, + const ForkId& retForkId, + const bool isAsyncExecution); + + inline const RRefId& retRRefId() const { + return retRRefId_; + } + + inline const ForkId& retForkId() const { + return retForkId_; + } + + static std::unique_ptr fromIValues( + std::vector& ivalues); + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const RRefId retRRefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId retForkId_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..e4fb8e7ca92d1389f1501908ad7062b0a223a204 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// Return value of a builtin operator or a TorchScript function. +class TORCH_API ScriptResp final : public RpcCommandBase { + public: + explicit ScriptResp(at::IValue&& values); + + const at::IValue& value(); + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue value_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..b5f3a788d0cb86657effd94171062c84f0efc0e9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h @@ -0,0 +1,498 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include +#include + +#include +#include +#include +#include +#include + +// Forward-declare the TensorPipe classes we need, to avoid including its +// headers in PyTorch's ones and thus have it become a public dependency. + +namespace tensorpipe { + +class Context; +class Error; +class Listener; +class Message; +class Pipe; + +namespace transport { +class Context; +} // namespace transport + +namespace channel { +class Context; +} // namespace channel + +} // namespace tensorpipe + +namespace torch::distributed::rpc { + +// These priorities instruct TensorPipe on which transport/channel to pick +// during handshake. Higher priorities will take precedence over lower ones. +// The transport with lowest priority will be the one used to bootstrap pipes. + +constexpr int64_t kShmTransportPriority = 200; +constexpr int64_t kIbvTransportPriority = 100; +// The UV transport just uses TCP and should work everywhere, thus keep it last. +constexpr int64_t kUvTransportPriority = 0; + +constexpr int64_t kCmaChannelPriority = 1200; +constexpr int64_t kMultiplexedUvChannelPriority = 1100; +// The basic channel reuses a transport as a channel, and is thus our fallback. +constexpr int64_t kBasicChannelPriority = 1000; + +// CPU channel have higher priority than CUDA channels, since the latter might +// handle CPU-to-CPU transfers, but will always be less efficient than their +// CPU-only counterparts. +constexpr int64_t kCudaIpcChannelPriority = 300; +constexpr int64_t kCudaGdrChannelPriority = 200; +constexpr int64_t kCudaXthChannelPriority = 400; +constexpr int64_t kCudaBasicChannelPriority = 0; + +using steady_clock_time_point = + std::chrono::time_point; + +struct TORCH_API TransportRegistration { + std::shared_ptr transport; + int64_t priority; + std::string address; +}; + +TORCH_DECLARE_REGISTRY(TensorPipeTransportRegistry, TransportRegistration); + +struct TORCH_API ChannelRegistration { + std::shared_ptr channel; + int64_t priority; +}; + +TORCH_DECLARE_REGISTRY(TensorPipeChannelRegistry, ChannelRegistration); + +struct TORCH_API TensorPipeRpcBackendOptions : public RpcBackendOptions { + TensorPipeRpcBackendOptions( + int numWorkerThreads, + std::optional> transports, + std::optional> channels, + float rpc_timeout, + std::string init_method, + std::unordered_map device_maps = {}, + std::vector devices = {}) + : RpcBackendOptions(rpc_timeout, std::move(init_method)), + numWorkerThreads(numWorkerThreads), + transports(std::move(transports)), + channels(std::move(channels)), + deviceMaps(std::move(device_maps)), + devices(std::move(devices)) { + TORCH_CHECK( + numWorkerThreads > 0, + "num_worker_threads must be positive, got ", + numWorkerThreads); + + if (this->transports.has_value()) { + for (const std::string& transportName : this->transports.value()) { + TORCH_CHECK( + TensorPipeTransportRegistry()->Has(transportName), + "Unknown transport: ", + transportName); + } + } + + if (this->channels.has_value()) { + for (const std::string& channelName : this->channels.value()) { + TORCH_CHECK( + TensorPipeChannelRegistry()->Has(channelName), + "Unknown channel: ", + channelName); + } + } + } + + void setDeviceMap(const std::string& workerName, const DeviceMap& deviceMap) { + auto iter = deviceMaps.find(workerName); + if (iter == deviceMaps.end()) { + deviceMaps[workerName] = deviceMap; + } else { + for (auto& entry : deviceMap) { + // c10::Device has no default constructor, hence map[device] doesn't + // work In C++-17 we can use insert_or_assign. + auto entryIter = iter->second.find(entry.first); + if (entryIter == iter->second.end()) { + iter->second.emplace(entry.first, entry.second); + } else { + entryIter->second = entry.second; + } + } + } + } + + int numWorkerThreads; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::optional> transports; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::optional> channels; + std::unordered_map deviceMaps; + std::vector devices; +}; + +// Struct to track the network source metrics +struct TORCH_API NetworkSourceInfo { + worker_id_t srcRank; + std::vector srcMachineAddr; +}; + +// Struct to track aggregated network metrics +struct TORCH_API AggregatedNetworkData { + uint64_t numCalls{0}; + uint64_t totalSentBytes{0}; + uint64_t totalRecvBytes{0}; + uint64_t totalErrors{0}; +}; + +// TensorPipeAgent leverages TensorPipe (https://github.com/pytorch/tensorpipe) +// to transparently move tensors and payloads through the fastest available +// transport or channel. It acts like a hybrid RPC transport, providing shared +// memory (linux) and TCP (linux & mac) support. CUDA support is in progress. +class TORCH_API TensorPipeAgent : public RpcAgent { + public: + TensorPipeAgent( + const c10::intrusive_ptr<::c10d::Store>& store, + std::string selfName, + worker_id_t selfId, + std::optional worldSize, + TensorPipeRpcBackendOptions opts, + std::unordered_map reverseDeviceMaps, + std::vector devices, + std::unique_ptr cb); + + TensorPipeAgent(const TensorPipeAgent&) = delete; + TensorPipeAgent& operator=(const TensorPipeAgent&) = delete; + + c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) override; + + // join() and sync() would be deprecated - + // https://github.com/pytorch/pytorch/issues/27647 + void join(bool shutdown = false, float timeout = 0) override; + void sync() override {} + void startImpl() override; + void shutdownImpl() override; + + ~TensorPipeAgent() override; + + const WorkerInfo& getWorkerInfo(const std::string& workerName) const override; + const WorkerInfo& getWorkerInfo(worker_id_t workerId) const override; + std::vector getWorkerInfos() const override; + void updateGroupMembership( + const WorkerInfo& workerInfo, + const std::vector& devices, + const std::unordered_map& reverseDeviceMaps, + bool isJoin); + + std::unordered_map getMetrics() override; + + void addGilWaitTime(const std::chrono::microseconds gilWaitTime) override; + + TensorPipeRpcBackendOptions getBackendOptions() const; + + const c10::intrusive_ptr<::c10d::Store> getStore() const; + + DeviceMap getDeviceMap(const WorkerInfo& dest) const override; + + const std::vector& getDevices() const override; + + using NetworkDataDict = + std::unordered_map; + + // Returns metrics tracked by the NetworkDataDict + NetworkDataDict getNetworkData(); + // Returns NetworkSourceInfo struct + NetworkSourceInfo getNetworkSourceInfo(); + + static const std::string& guessAddress(); + + // For testing purposes. + size_t timeoutMapSize(); + size_t numPendingResponses(); + size_t messageIdToTimeoutMapSize(); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isStaticGroup_; + + protected: + // TensorPipe write function that could be used to write response + // messages by server, and write request messages by client. This + // is a protected method since it is overwritten by FaultyTensorPipeAgent + virtual void pipeWrite( + const std::shared_ptr& /*pipe*/, + const c10::intrusive_ptr& message, + std::vector&& devices, + std::vector streams, + std::function /*fn*/) noexcept; + + private: + // Removes the given messageId with the given expirationTime from the + // timeoutMap_. + void removeFromTimeoutMap(uint64_t messageId); + + // Populates workerIdToInfo_ and workerNameToInfo_ using addressStore_ + void prepareNames(bool isStaticGroup); + + // Check the static group attribute with the value set in store + void checkAndSetStaticGroup(const c10::intrusive_ptr<::c10d::Store>& store); + + const std::string& findWorkerURL(const WorkerInfo& worker) const; + + // Only use for Dynamic RPC groups, method to have worker leave group + void leaveGroup(); + + // TensorPipe read function that could be used to read response messages + // by client, and read request messages by server. + void pipeRead( + const std::shared_ptr& /*pipe*/, + std::function, + std::vector)> /*fn*/) noexcept; + + // Callback of listener accept() + void onListenerAccepted( + const tensorpipe::Error& error, + std::shared_ptr& pipe); + + // Respond to a call from a peer + void respond(std::shared_ptr& pipe); + + void sendCompletedResponseMessage( + std::shared_ptr& pipe, + JitFuture& futureResponseMessage, + uint64_t messageId, + std::vector stream); + + // Collects metrics from successful RPC calls + void trackNetworkData( + uint64_t requestSize, + uint64_t responseSize, + const std::string& destWorkerName); + + // Collects metrics from failed RPC calls + void trackNetworkError( + uint64_t requestSize, + const std::string& destWorkerName); + + inline std::vector getDevicesForRemote( + const std::string& remoteName, + const Message& message) const; + + // When a request+response completes, we need to mark the future message as + // complete. However, if its timeout has already expired, it already has an + // error set. There is no atomic "test-and-set" way to mark a future complete + // only if it isn't yet. It does exist for errors (setErrorIfNeeded) but, even + // then, it ends up printing a log message, which may worry the user. To solve + // both issues we use a separate atomic flag to know the status of the future. + struct AtomicJitFuture { + explicit AtomicJitFuture(const std::vector& devices) { + jitFuture = c10::make_intrusive( + at::AnyClassType::get(), devices); + } + + std::atomic_flag isComplete = ATOMIC_FLAG_INIT; + c10::intrusive_ptr jitFuture; + }; + + // Maintains state per client pipe to track pending response messages and + // error states. pendingResponseMessage_ should be protected by a mutex since + // it can be raced with user send() call. + // TODO: To achieve better performance we can have a pipe pool per + // client that can be configured using RpcBackendOptions. + struct ClientPipe { + explicit ClientPipe(std::shared_ptr pipe) + : pipe_(std::move(pipe)) {} + std::shared_ptr pipe_; + mutable std::mutex mutex_; + bool inError_{false}; + // Map from Message Request ID's to corresponding futures. + std::unordered_map> + pendingResponseMessage_; + }; + + const c10::intrusive_ptr<::c10d::Store> store_; + + const TensorPipeRpcBackendOptions opts_; + // For dynamic RPC, the reverse device maps are updated whenever a new rank + // joins or leaves the group + std::unordered_map reverseDeviceMaps_; + // Local devices used by this agent. If application didn't specify this + // field, it will be initialized using corresponding local devices in + // opts_.deviceMaps and reverseDeviceMaps_; + std::vector devices_; + + ThreadPool threadPool_; + std::shared_ptr context_; + std::shared_ptr listener_; + + mutable std::mutex connectedPipesMutex_; + std::unordered_map connectedPipes_; + + // Maps keyed on name and id for easy WorkerInfo lookup. + std::unordered_map workerIdToInfo_; + std::unordered_map workerNameToInfo_; + std::unordered_map workerNameToURL_; + + ::c10d::PrefixStore rankToNameStore_; + ::c10d::PrefixStore nameToAddressStore_; + // Store keys that will used to count joined processes and active calls during + // the shutdown process + ::c10d::PrefixStore shutdownStore_; + int worldSize_ = 0; + std::atomic nextMessageID_{0}; + + // Metadata used for tracking of whether certain RPCs have timed out or not. + struct TimeoutMessageMetadata { + TimeoutMessageMetadata( + uint64_t messageId_, + std::shared_ptr responseFuture_, + std::chrono::milliseconds timeout_) + : messageId(messageId_), + responseFuture(std::move(responseFuture_)), + timeout(timeout_) {} + uint64_t messageId; + std::shared_ptr responseFuture; + std::chrono::milliseconds timeout; + }; + + // Map to store the expiration times for each message. + std::map> + timeoutMap_; + + // Map to store the messageId to expiry time. + std::unordered_map messageIdToTimeout_; + + // Thread that will poll the timeoutMap_ for timed out messages and mark them + // with an error accordingly + std::thread timeoutThread_; + + // Function run by the timeoutThread_ to check for timed out RPCs + void pollTimeoutRpcs(); + + // Mutex to guard the timeoutMap_ + std::mutex timeoutMapMutex_; + + // Condition Variable to signal population of the timeoutMap_ + std::condition_variable timeoutThreadCV_; + + // Returns the expiration time for an RPC by adding the current time to the + // passed in timeout. + inline steady_clock_time_point computeRpcMessageExpiryTime( + std::chrono::milliseconds timeout) const { + return std::chrono::time_point_cast( + std::chrono::steady_clock::now() + timeout); + } + + // Handle error on an outgoing pipe + void handleClientError( + ClientPipe& clientPipe, + const tensorpipe::Error& error); + + // This is a generic struct for capturing Time-Series Metrics. It keeps a + // running sum and count of data points (observations), and can return an + // average of the data points seen so far. This is currently only used for + // tracking the GIL Wait Time in RPC Agents, but can be used for other metrics + // as well. + struct TimeSeriesMetricsTracker { + // Running sum of the data points seen so far + uint64_t currentSum_; + // Running count of the data points seen so far + uint64_t currentCount_; + + explicit TimeSeriesMetricsTracker( + uint64_t currentSum = 0, + uint64_t currentCount = 0); + + // Adds a data point (which is basically one observation for the metric + // being tracked) to the running sum and count. + void addData(uint64_t dataPoint); + // Returns the average of all the data points seen so far. + float computeAverage() const; + }; + + // Map of Time-Series metrics tracked by the RPC Agent + std::unordered_map timeSeriesMetrics_; + // Mutex to guard timeSeriesMetrics_ + std::mutex metricsMutex_; + + // Custom lock guard used to check if the RPC group is dynamic and lock the + // mutex if so + struct GroupMembershipLockGuard { + GroupMembershipLockGuard(std::mutex& mutex, bool isStaticGroup) + : ref_(mutex), isStaticGroup_(isStaticGroup) { + if (isStaticGroup_) { + ref_.lock(); + } + } + + ~GroupMembershipLockGuard() { + if (isStaticGroup_) { + ref_.unlock(); + } + } + + GroupMembershipLockGuard(const GroupMembershipLockGuard&) = delete; + + private: + std::mutex& ref_; + bool isStaticGroup_; + }; + // Mutex to guard access to group membership data + // e.g. updates to (workerIdToInfo_, workerNameToInfo_, workerNameToURL_) + mutable std::mutex groupMembershipMutex_; + + // Map to Track Network Data + NetworkDataDict networkData_; + // Mutex to guard networkData_ + std::mutex networkDataMutex_; + + // A mutex and a cv to guard access to the call counts and watch for changes. + std::mutex callCountMutex_; + std::condition_variable callCountCV_; + // Running total of un-processed, un-errored RPC calls sent + int32_t clientActiveCalls_{0}; + // Running total of un-processed RPC requests received + int32_t serverActiveCalls_{0}; + // Running total of RPC requests that will be completed asynchronously + int32_t serverActiveAsyncCalls_{0}; + + // Whether a global graceful shutdown has begun, in which case we'll silence + // error messages due to remote workers closing their pipes. + std::atomic shuttingDown_{false}; + + // Helpers to modify the counts while correctly dealing with the mutex and cv. + void increaseCallCount(int32_t& count); + void decreaseCallCount(int32_t& count); + + // Helpers to set the state of the requests. + void markFutureAsComplete( + std::shared_ptr atomicFuture, + c10::intrusive_ptr message, + std::vector streams); + void markFutureWithError( + std::shared_ptr atomicFuture, + std::string errorMsg); +}; + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..025e143190c2df7c2898f03187e70064619bbf3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h @@ -0,0 +1,124 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include + +namespace tensorpipe { +class Message; +class Allocation; +class Descriptor; +} // namespace tensorpipe + +namespace torch::distributed::rpc { + +TORCH_API const c10::Stream& getStreamForDevice( + const std::vector& streams, + const c10::Device& device); + +// Inspired by c10/core/impl/DeviceGuardImplInterface.h. + +class TensorpipeDeviceTypeConverter { + public: + // Ideally we'd want this to also return a tensorpipe::Message::Tensor object + // but we cannot forward-declare that class (because it's nested), and we + // cannot include the TensorPipe headers because it's a private dependency. + // Thus we bend over backwards and entrust this method with appending that + // object to the `tensors` field of the tensorpipe::Message object we pass. + virtual std::optional> prepareTensorForSending( + const c10::Storage& storage, + const std::vector& streams, + tensorpipe::Message& message) const = 0; + + // Same as above: this method cannot return a tensorpipe::Allocation::Tensor, + // thus it appends it to the `tensors` field of the tensorpipe::Allocation. + virtual at::DataPtr allocateTensorForReceiving( + c10::DeviceIndex deviceIndex, + size_t length, + const std::vector& streams, + tensorpipe::Allocation& allocation) const = 0; + + virtual ~TensorpipeDeviceTypeConverter() = default; +}; + +extern TORCH_API std::array< + std::atomic, + static_cast(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)> + device_type_converter_registry; + +class TORCH_API TensorpipeDeviceTypeConverterRegistrar { + public: + TensorpipeDeviceTypeConverterRegistrar( + DeviceType /*type*/, + const TensorpipeDeviceTypeConverter* /*impl*/); +}; + +#define C10_REGISTER_TENSORPIPE_DEVICE_TYPE_CONVERTER( \ + DevType, TensorpipeDeviceTypeConverter) \ + static ::torch::distributed::rpc::TensorpipeDeviceTypeConverterRegistrar \ + C10_ANONYMOUS_VARIABLE(g_##DeviceType)( \ + ::c10::DeviceType::DevType, new TensorpipeDeviceTypeConverter()); + +inline const TensorpipeDeviceTypeConverter* getDeviceTypeConverter( + DeviceType type) { + return device_type_converter_registry[static_cast(type)].load(); +} + +// A struct that holds pointers that keep alive all the memory that will be +// accessed by TensorPipe during a write operation. +struct TensorpipeWriteBuffers { + // Allocate on heap so pointers stay valid as we move the holder. + std::unique_ptr type; + std::unique_ptr id; + std::vector payload; + std::vector pickle; + // This contains the original tensors and the clones of the sparse tensors. + std::vector tensors; + // This contains the copies of the data of the tensors that didn't own their + // memory, e.g., the ones created from torch::from_blob() with no deleter. + std::vector> copiedTensors; +}; + +// A struct that holds pointers that keep alive all the memory that will be +// accessed by TensorPipe during a read operation. +struct TensorpipeReadBuffers { + // Allocate on heap so pointers stay valid as we move the holder. + std::unique_ptr type; + std::unique_ptr id; + std::vector payload; + std::vector pickle; + std::vector tensors; +}; + +// Convert an RPC message into a TensorPipe message, plus a holder to all the +// data that must be kept alive while the write is performed asynchronously. +TORCH_API std::tuple +tensorpipeSerialize( + const c10::intrusive_ptr& rpcMessage, + std::vector devices, + const std::vector& streams); + +// Allocate the buffers that will hold the incoming data. They will be managed +// by the returned holder, which must be kept alive until the asynchronous read +// has finished. Pointers to these buffers will be stored in the returned +// tensorpipe::Allocation struct. +TORCH_API std::pair +tensorpipeAllocate( + const tensorpipe::Descriptor& tpDescriptor, + const std::vector& streams); + +// Convert a TensorPipe message back into an RPC message. This requires the data +// to be available and can thus only be performed once the asynchronous read has +// completed. The holder can be destroyed once this function returns. +TORCH_API c10::intrusive_ptr tensorpipeDeserialize( + const tensorpipe::Descriptor& tpDescriptor, + TensorpipeReadBuffers&& holder); + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..c0adb349f2095a721970b5bfdd9acd7141abe827 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include +#include + +namespace torch::distributed::rpc { + +struct TORCH_API FaultyTensorPipeRpcBackendOptions + : public TensorPipeRpcBackendOptions { + FaultyTensorPipeRpcBackendOptions( + int num_worker_threads, + float rpc_timeout, + std::string init_method, + std::vector messages_to_fail, + std::unordered_map messages_to_delay, + int num_fail_sends = 0) + : TensorPipeRpcBackendOptions( + num_worker_threads, + std::optional>(), + std::optional>(), + rpc_timeout, + std::move(init_method)), + messagesToFail(std::move(messages_to_fail)), + messagesToDelay(std::move(messages_to_delay)), + numFailSends(num_fail_sends) { + TORCH_CHECK(numFailSends >= 0, "numFailSends should be non-negative"); + } + + std::vector messagesToFail; + std::unordered_map messagesToDelay; + int numFailSends; +}; + +class TORCH_API FaultyTensorPipeAgent : public TensorPipeAgent { + public: + FaultyTensorPipeAgent( + const c10::intrusive_ptr<::c10d::Store>& store, + std::string selfName, + worker_id_t selfId, + int worldSize, + FaultyTensorPipeRpcBackendOptions opts, + std::unordered_map reverseDeviceMaps, + std::vector devices, + std::unique_ptr callback); + + // Faulty send function for this class. + c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) override; + + // Add delay to writes + void pipeWrite( + const std::shared_ptr& pipe, + const c10::intrusive_ptr& rpcMessage, + std::vector&& devices, + std::vector streams, + std::function fn) noexcept override; + + protected: + // This function checks the messageTypesToFail_ to determine whether to use + // the faulty send or not. + bool shouldFailMessage(MessageType type) const; + + private: + // This function parses the list of strings passed in by the python tests and + // resolves the Message Types that must use the faulty send. + std::vector parseMessagesToFailInput( + const std::vector& messagesToFail) const; + + // Returns amount of time in seconds to delay sending of the given message + // type. + float getDelayForMessage(MessageType type) const; + + // Parse message types that we should inject arbitrary delays for. + std::unordered_map> parseMessagesToDelay( + const std::unordered_map& messageTypesToDelay) const; + + // Number of sends to intentionally fail before allowing one to succeed. + const int numFailSends_; + + // Vector of the MessageTypes that we must use the faulty send for. This is + // parsed based on a list of strings passed in by the python tests. + const std::vector messageTypesToFail_; + + // Mapping of message types to amount we should delay send for in the ::send() + // function. + std::unordered_map> messageTypesToDelay_; + + // Map to track the number of sends we've failed for each RPC. + std::unordered_map failMessageCountMap_; + + // Mutex to guard failMessageCountMap_ + std::mutex failMapMutex_; + + MessageType messageStringToType(const std::string& messageString) const; +}; + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h new file mode 100644 index 0000000000000000000000000000000000000000..baf94a5397fe21394cf9ab024800a29cbc4fe9d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc::testing { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::rpc::testing + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..37c6975d05559d13de07463d279ea11cab121f79 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// This function sends an rpc call to run torchscript function, currently the +// torchscript function could only be a user defined python function with +// "@torch.jit.script" annotation. The torchscript function could not be +// a class constructor, class method, instance method or a script module. +// dst: destination worker name +// qualifiedName: torchscript function qualified name string like +// "moduleName::torchscriptFunctionName", e.g, +// "dist_autograd_test::my_py_add" +// stack: a bag of IValue args passed to torchscriptFunctionName +// It returns c10::intrusive_ptr +c10::intrusive_ptr TORCH_API rpcTorchscript( + const std::string& dstWorkerName, + const c10::QualifiedName& qualifiedName, + const c10::FunctionSchema& functionSchema, + std::vector stack, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const bool isAsyncExecution = false); + +c10::intrusive_ptr TORCH_API remoteTorchscript( + const std::string& dstWorkerName, + const c10::QualifiedName& qualifiedName, + const c10::FunctionSchema& functionSchema, + std::vector& stack, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const bool isAsyncExecution = false); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h new file mode 100644 index 0000000000000000000000000000000000000000..f8ec54b86e986a64fd2f55d2c35e1d5c594f30f4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +using worker_id_t = int16_t; +using local_id_t = int64_t; + +bool getAllowJitRRefPickle(); +TORCH_API void enableJitRRefPickle(); +TORCH_API void disableJitRRefPickle(); + +struct TORCH_API JitRRefPickleGuard { + JitRRefPickleGuard(); + JitRRefPickleGuard(JitRRefPickleGuard&& other) = delete; + JitRRefPickleGuard(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(JitRRefPickleGuard&&) = delete; + ~JitRRefPickleGuard(); +}; + +struct TORCH_API GloballyUniqueId final { + GloballyUniqueId(worker_id_t createdOn, local_id_t localId); + GloballyUniqueId(const GloballyUniqueId& other) = default; + GloballyUniqueId& operator=(const GloballyUniqueId& other) = delete; + GloballyUniqueId(GloballyUniqueId&& other) = default; + GloballyUniqueId& operator=(GloballyUniqueId&& other) = delete; + ~GloballyUniqueId() = default; + + bool operator==(const GloballyUniqueId& other) const; + bool operator!=(const GloballyUniqueId& other) const; + + at::IValue toIValue() const; + static GloballyUniqueId fromIValue(const at::IValue& /*ivalue*/); + + struct Hash { + size_t operator()(const GloballyUniqueId& key) const { + return (uint64_t(key.createdOn_) << kLocalIdBits) | key.localId_; + } + }; + + static constexpr int kLocalIdBits = 48; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t createdOn_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const local_id_t localId_; +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const GloballyUniqueId& globalId); + +using RRefId = GloballyUniqueId; +using ForkId = GloballyUniqueId; +using ProfilingId = GloballyUniqueId; + +struct TORCH_API SerializedPyObj final { + SerializedPyObj(std::string&& payload, std::vector&& tensors) + : payload_(std::move(payload)), tensors_(std::move(tensors)) {} + + std::vector toIValues() &&; + static SerializedPyObj fromIValues(std::vector value); + + std::string payload_; + std::vector tensors_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h new file mode 100644 index 0000000000000000000000000000000000000000..da76292342019059afd7d1c868c7cac1e375fd88 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::rpc { + +// This class converts the content in a PythonCall into py::object. This is a +// helper class to make sure that all arguments deserialization is done before +// entering RequestCallbackImpl::processRpc(...), so that the deserialization +// related logic can be carried out in one spot instead of scattered in multiple +// places for different message types. +// NB: The reason for not consolidating class into PythonCall is because +// PythonCall is a libtorch type which should not depend on Python types. +class TORCH_API UnpickledPythonCall : public RpcCommandBase { + public: + UnpickledPythonCall( + const SerializedPyObj& serializedPyObj, + bool isAsyncExecution); + ~UnpickledPythonCall() override; + + // toMessage() method is not implemented, as objects of this class should + // never be directly converted into a Message object. + c10::intrusive_ptr toMessageImpl() && override; + const py::object& pythonUdf() const; + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + private: + py::object pythonUdf_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..afe8a977a615e59b2f77e180275e9fc0e6adc92b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// This class converts the content in a PythonRemoteCall into py::object. This +// is a helper class to make sure that all arguments deserialization is done +// before entering RequestCallbackImpl::processRpc(...), so that the +// deserialization related logic can be carried out in one spot instead of +// scattered in multiple places for different message types. +// NB: The reason for not consolidating class into PythonRemoteCall is because +// PythonRemoteCall is a libtorch type which should not depend on Python types. +class TORCH_API UnpickledPythonRemoteCall final : public UnpickledPythonCall { + public: + explicit UnpickledPythonRemoteCall( + const SerializedPyObj& serializedPyObj, + const at::IValue& retRRefId, + const at::IValue& retForkId, + const bool isAsyncExecution); + + const RRefId& rrefId() const; + const ForkId& forkId() const; + + private: + RRefId rrefId_; + ForkId forkId_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..324d76b2e4dc48b6b12b593fa58c198daa723ad9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Parse error message and return RPCErrorType based on the message. +TORCH_API RPCErrorType getRPCErrorType(const JitFuture& jitFuture); +// Create an error string given the error description and error type +TORCH_API std::string makeRPCError( + const std::string& rpcErrorStr, + RPCErrorType errorType); + +// Given an RPC message received as a request over the wire, deserialize it into +// the appropriate 'RpcCommandBase' type. +TORCH_API std::unique_ptr deserializeRequest( + const Message& request); + +// Given an RPC message received as a response over the wire, deserialize it +// into the appropriate 'RpcCommandBase' type, if the response is +// FORWARD_AUTOGRAD_RESP type, unwrap it, attach recvBackward() functions +// to received tensors and set the wrappedMsgType to its wrapped message type. +TORCH_API std::unique_ptr deserializeResponse( + const Message& response, + MessageType& wrappedMsgType); + +// Given an RPC message received as a response over the wire, deserialize it +// into the valid IValue if the message is for a script rpc result, +// otherwise deserialize it into dummy none ivalue that will never be used. +// In this deserialization, we also attach recv rpc backward functions if +// needed. +IValue deserializeResptoIValueInternal( + RpcCommandBase& rpc, + MessageType messageType); +TORCH_API IValue deserializeRespToIValue(const Message& message); + +// Note: format is subject to change and intended for RPCs. +// For saving persistently to disk, use torch::save(). +TORCH_API std::string wireSerialize( + const std::vector& payload, + const std::vector& tensors); + +TORCH_API std::pair, std::vector> wireDeserialize( + const void* data, + size_t data_size); + +// We use vector as the type of blobs because it's what rpc::Message uses +// for its payload, even though it has the disadvantage that it cannot be +// allocated with uninitialized memory: it is always zeroed out. + +// Some Tensors are effectively views of larger Tensors, where only a small +// subset of the Storage data is referenced. This normally is good and avoids +// copies when kept locally, but if we naively push the whole Storage over the +// wire, we'll end up with excess network traffic. This change clones tensors if +// we'd save at least half the data, and over a minimum hurdle. +TORCH_API c10::List cloneSparseTensors( + const std::vector& tensors); + +// Combines an original payload and wrapped payload into the original payload. +// Used to generate the overall payload for the wrapped RPC. +TORCH_API void writeWrappedPayload( + std::vector& originalPayload, + std::vector& additionalPayload); + +// Reads the additional, wrapped payload from a wrapped RPC off of the input +// payload. After this, payload will contain the payload of the original, +// un-wrapped RPC. +TORCH_API std::vector readWrappedPayload( + std::vector& payload, + const rpc::Message& message); + +// Takes a list of events from autograd profiler and populates them into +// profiledEvents to be carried over RPC. +TORCH_API void populateRemoteProfiledEvents( + std::vector& profiledEvents, + const torch::autograd::profiler::ProfilerConfig& profilerConfig, + const std::vector>& + eventLists); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h new file mode 100644 index 0000000000000000000000000000000000000000..75f05b99b69aca3439c9b5be53685dd65622ebcc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API BackendData { + public: + struct Info { + /** + * Used by Lazy Graph Executor to tag info on BackendData objs + * */ + virtual ~Info() = default; + }; + /** + * Represents (Tensor) data stored on a backend device + * in its native format. + * */ + using Handle = int64_t; + + BackendData(BackendDevice device, Shape shape) + : device_(std::move(device)), shape_(std::move(shape)) {} + + virtual ~BackendData() = default; + + const BackendDevice& device() const { + return device_; + } + + const Shape& shape() const { + return shape_; + } + + Info* info() const { + return info_.get(); + } + + std::shared_ptr SetInfo(std::shared_ptr info) { + std::swap(info, info_); + return info; + } + + virtual Handle GetHandle() = 0; + + virtual void Assign(const BackendData& data) = 0; + + virtual bool HasValue() const = 0; + + private: + BackendDevice device_; + Shape shape_; + std::shared_ptr info_; +}; + +using BackendDataPtr = std::shared_ptr; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h new file mode 100644 index 0000000000000000000000000000000000000000..b5e697035bb12196ea0d59b4b5cda3ab1fa9cee3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h @@ -0,0 +1,105 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { +struct Device; +} + +namespace torch::lazy { + +// Backend should extend it and define their own supported hardware types. +struct TORCH_API BackendDeviceType { + int8_t type{(int8_t)at::kCPU}; + // Note: previous default value was '0', which actually maps to at::kCPU, at + // least now it is explicit, we may want to make default/undefined semantics + // more clear though + BackendDeviceType() : type((int8_t)at::kCPU) {} + BackendDeviceType(int8_t type) : type(type) {} + + virtual ~BackendDeviceType() = default; + virtual std::string toString() const { + return "Unknown"; + } +}; + +class TORCH_API BackendDevice { + public: + // The default constructor will set both the device type and ordinal + // to backend specific defaults. + BackendDevice(); + BackendDevice(std::shared_ptr&& type, int64_t ordinal); + + int8_t type() const; + int64_t ordinal() const { + return ordinal_; + } + + bool operator==(const BackendDevice& other) const { + return compare(other) == 0; + } + bool operator!=(const BackendDevice& other) const { + return compare(other) != 0; + } + bool operator<(const BackendDevice& rhs) const { + return compare(rhs) < 0; + } + + std::string toString() const; + + private: + int compare(const BackendDevice& rhs) const; + + // Use shared_ptr instead of unique_ptr so that BackendDevice can be copied. + std::shared_ptr type_; + int64_t ordinal_; +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const BackendDevice& device); + +// Helpers for converting a c10::Device to BackendDevice and vice versa. +TORCH_API BackendDevice atenDeviceToBackendDevice(const c10::Device& device); +TORCH_API c10::Device backendDeviceToAtenDevice(const BackendDevice& device); + +// Tries to extract the backend device out of the lazy tensor. Returns nullopt +// if the input is not a lazy tensor. +TORCH_API std::optional GetBackendDevice( + const at::ITensorListRef tensors); +TORCH_API std::optional GetBackendDevice( + const at::TensorList tensors); +TORCH_API std::optional GetBackendDevice( + const at::Tensor& tensor); +TORCH_API std::optional GetBackendDevice( + const std::optional& device); + +// For variadic template. +TORCH_API std::optional GetBackendDevice(); + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Winfinite-recursion") +template +std::optional GetBackendDevice( + const T& tensor, + const Args&... forward_tensors) { + auto optional_device = GetBackendDevice(tensor); + if (optional_device) { + return optional_device; + } + return GetBackendDevice(forward_tensors...); +} +C10_DIAGNOSTIC_POP() + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..9f885d4d4c71618d6c824f197ef109b0c0c76dfc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h @@ -0,0 +1,160 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +struct IrBuilder; + +/** + * Work in progress- don't treat this as a stable interface yet! + */ +class TORCH_API BackendImplInterface { + public: + virtual ~BackendImplInterface() = default; + + /** + * Initialization/Teardown + * */ + // No-op by default. Allows custom functionality to be exposed through + // extension bindings. + virtual void InitializeAtenBindings() const {} + + virtual void PrepareToExit() const = 0; + + /** + * Configuration + * */ + + virtual void SetRngSeed(size_t seed) const = 0; + + /** + * IR Tracing + * */ + + virtual const IrBuilder* GetIrBuilder() const = 0; + + /** + * Data Transfer + * */ + + virtual BackendDataPtr MakeComputationDataFromTensor( + const at::Tensor& tensor, + const Shape& shape, + const BackendDevice& device) const = 0; + virtual BackendDataPtr MakeComputationDataFromScalar( + const at::Scalar& scalar, + const torch::lazy::BackendDevice& device) const = 0; + virtual BackendDataPtr CreateDataPlaceholder( + const BackendDevice& device, + const Shape& shape) const = 0; + + // Gets backend data if the node is a device data node. Otherwise returns + // nullptr + virtual BackendDataPtr GetComputationDataFromNode(const Node*) const = 0; + + virtual at::Tensor MakeTensorFromComputationData( + const BackendDataPtr data, + std::optional logical_scalar_type) const = 0; + + /** + * Lowering, Compilation, Execution + * */ + + virtual std::unique_ptr CreateLoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status) const = 0; + + virtual std::unique_ptr CreateLoweringContext( + const std::string& name, + BackendDevice device) const = 0; + + // TODO(whc) need to keep this? + virtual std::vector GetCompilationDevices( + const std::string& device, + c10::ArrayRef devices) const = 0; + + virtual std::vector Compile( + std::vector instances) const = 0; + + virtual std::vector ExecuteComputation( + torch::lazy::ComputationPtr computation, + c10::ArrayRef arguments, + const BackendDevice& device) const = 0; + + /** + * Device Configuration + * */ + + // Set or get the default device type. + // For backends used with virtual c10::Devices, this configures what real + // device type the backend should use, and matters if the backend supports + // more than one type of real device. + virtual std::shared_ptr GetDefaultDeviceType() const = 0; + virtual void SetDefaultDeviceType(int8_t type) = 0; + + // Set or get the default device ordinal. + // For backends that supports multi-device, this configures what the + // default device the backend should use. + virtual int64_t GetDefaultDeviceOrdinal() const = 0; + virtual void SetDefaultDeviceOrdinal(int64_t) = 0; + + // Specify which aten device should be used for eager fallback + // may change depending on current 'Default' DeviceType + virtual at::DeviceType EagerFallbackDeviceType() const = 0; + + // Query all available backend devices + virtual std::vector GetBackendDevices() const = 0; + + virtual std::string CreateMetricReport() const { + return ""; + } + + // Map a particular c10:: device to a concrete backend device + // Note:: c10:: devices may be virtual or concrete. xla:: and lazy:: are + // virtual devices, meaning they may map to a gpu, tpu, etc. behind the + // scenes. In the future, non-virtual c10:: devices may also use lazy tensors + // through a mode, in which case these APIs should still work, but should be + // identity mappings. + virtual BackendDevice GetBackendDevice(c10::Device device) const = 0; + + // TODO(whc) + // Additional APIs expected for supporting distributed training, to be + // designed + + /** + * Debug/Metrics + * */ + + // virtual std::map GetMetrics() const = 0; + + // virtual MemoryInfo GetMemoryInfo(const std::string& device) = 0; + + virtual std::string GetComputationBackendText( + const ComputationPtr computation) const = 0; +}; + +class TORCH_API BackendRegistrar { + public: + BackendRegistrar(const BackendImplInterface* backend_impl_interface); +}; + +TORCH_API bool hasBackend(); +TORCH_API const BackendImplInterface* getBackend(); + +TORCH_API const IrBuilder* getIrBuilder(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h new file mode 100644 index 0000000000000000000000000000000000000000..8de72ec167fdd9d27412ba13b8ac143e16d2c0b1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h @@ -0,0 +1,115 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API Computation { + public: + virtual int parameters_size() const = 0; + + virtual const std::vector& parameter_shapes() const = 0; + + virtual const std::vector& parameter_names() const = 0; + + virtual const Shape& result_shape() const = 0; + + virtual const std::string to_string() const = 0; + + virtual ~Computation() = default; + + // Indicates whether this computation is being executed inside a mark step + // Assume false unless set otherwise + bool in_mark_step = false; +}; + +using ComputationPtr = std::shared_ptr; + +// Keeps track of the code generation state. +class TORCH_API LoweringContext { + public: + LoweringContext(const std::string& name, BackendDevice device); + LoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + virtual ~LoweringContext() = default; + + static std::unique_ptr Create( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + static std::unique_ptr Create( + const std::string& name, + BackendDevice device); + + const BackendDevice& device() const { + return device_; + } + + // Retrieves the vector holding all the tensors associated with the parameter + // instructions which have been created. + const std::vector& GetParametersData() const; + + // Adds a new input/output alias. + virtual void SetUpAlias( + const std::vector& output_index, + int64_t param_number, + const std::vector& param_index, + bool must_alias = false) { + // Dummy default implementation to do nothing. + } + + // Check if parameter shape matches result at index. + virtual bool CheckResultShape( + const BackendDataPtr& parameter_data, + size_t result_idx) { + // Dummy default implementation to do nothing. + return false; + } + + // Adds the given output as a component of the result tuple and returns its + // assigned position within the tuple. + virtual size_t AddResult(const torch::lazy::Output& output) = 0; + + // Associates the given output with the input parameter of the given index and + // shape. Only used for the operator-by-operator execution, mostly for + // debugging purposes. + virtual void AddParameter( + const torch::lazy::Output& output, + size_t index, + const Shape& shape, + const std::string& name) = 0; + + // Build the computation capturing all the operations created with the + // embedded builder (returned by the builder() API). + virtual ComputationPtr Build() = 0; + + size_t GetEmittedNodeCount() const { + return emit_status_.size(); + } + + protected: + BackendDevice device_; + std::vector parameters_; + std::vector parameter_sequence_; + Util::EmissionMap emit_status_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h new file mode 100644 index 0000000000000000000000000000000000000000..a34161654e64d8277e0af2508360dcaa9aa782b8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h @@ -0,0 +1,148 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Cache utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/cache.h + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +// Generic key and object cache with LRU expiration policy. The objects of type +// T will be stored as std::shared_ptr and taken and returned as such, by the +// cache API. +template < + typename K, + typename T, + typename H = std::hash, + typename E = std::equal_to> +class Cache { + public: + using TypePtr = std::shared_ptr; + using Element = std::pair; + + explicit Cache(size_t max_size) : max_size_(max_size) {} + + // Adds an object to the cache, unless it already exists. If the cache grows + // beyond the limit set during construction, the oldest used object will be + // removed from the cache. + TypePtr Add(K key, TypePtr object) { + if (!max_size_) { + return object; + } + std::lock_guard slock(lock_); + element_list_.emplace_front(Element(std::move(key), std::move(object))); + auto it = element_list_.begin(); + auto emplace_result = element_map_.emplace(&it->first, it); + if (!emplace_result.second) { + element_list_.erase(it); + DoLRU(emplace_result.first->second); + } else if (element_list_.size() > max_size_) { + Element* last = &element_list_.back(); + element_map_.erase(&last->first); + element_list_.pop_back(); + } + return emplace_result.first->second->second; + } + + // Retrieves the existing object if it exists. If it does, its position in + // the LRU list gets moved to the head of the list. + // Returns nullptr if no object with the specified key is found within the + // cache. + TypePtr Get(const K& key) { + if (!max_size_) { + return nullptr; + } + std::lock_guard slock(lock_); + auto it = element_map_.find(&key); + if (it == element_map_.end()) { + return nullptr; + } + DoLRU(it->second); + return it->second->second; + } + + TypePtr GetLatest() { + std::lock_guard g(lock_); + TORCH_CHECK(!element_list_.empty()); + return element_list_.front().second; + } + + bool Erase(const K& key) { + if (!max_size_) { + return false; + } + std::lock_guard slock(lock_); + auto it = element_map_.find(&key); + if (it == element_map_.end()) { + return false; + } + auto lit = it->second; + element_map_.erase(it); + element_list_.erase(lit); + return true; + } + + void Clear() { + if (!max_size_) { + return; + } + std::lock_guard slock(lock_); + element_map_.clear(); + element_list_.clear(); + } + + int Numel() const { + if (!max_size_) { + return 0; + } + std::lock_guard g(lock_); + TORCH_CHECK(element_map_.size() == element_list_.size()); + return element_map_.size(); + } + + private: + using ElementList = std::list; + + struct Hasher { + size_t operator()(const K* key) const { + return hasher(*key); + } + + H hasher; + }; + + struct Equaler { + bool operator()(const K* k1, const K* k2) const { + return equaler(*k1, *k2); + } + + E equaler; + }; + + using ElementMap = std:: + unordered_map; + + void DoLRU(typename ElementList::iterator it) { + element_list_.splice(element_list_.begin(), element_list_, it); + } + + mutable std::mutex lock_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const size_t max_size_ = 0; + ElementList element_list_; + ElementMap element_map_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h new file mode 100644 index 0000000000000000000000000000000000000000..83ff42b145fa390c600e4f005c29c1993fdbbff9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +TORCH_DECLARE_bool(torch_lazy_ir_debug); +TORCH_DECLARE_bool(torch_lazy_handle_special_scalars); +TORCH_DECLARE_bool(torch_lazy_all_numbers_special_scalars); +TORCH_DECLARE_bool(torch_lazy_param_aliasing); +TORCH_DECLARE_bool(torch_lazy_reuse_ir); +TORCH_DECLARE_bool(torch_lazy_use_thread_pool); +TORCH_DECLARE_bool(torch_lazy_enable_device_data_cache); + +TORCH_DECLARE_int(torch_lazy_compilation_cache_size); +TORCH_DECLARE_int(torch_lazy_device_data_cache_size); +TORCH_DECLARE_int(torch_lazy_io_thread_pool_size); +TORCH_DECLARE_int(torch_lazy_metrics_samples); +TORCH_DECLARE_int(torch_lazy_trim_graph_check_frequency); +TORCH_DECLARE_int(torch_lazy_trim_graph_size); + +TORCH_DECLARE_string(torch_lazy_metrics_percentiles); + +TORCH_DECLARE_int(torch_lazy_shape_cache_size); + +namespace torch::lazy { +TORCH_API std::string& getLTCForceFallback(); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h new file mode 100644 index 0000000000000000000000000000000000000000..7c3ce3171ce299d10a487dc5f85a9861d7d123c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::lazy { + +TORCH_API std::function()>& +GetPythonFramesFunction(); + +TORCH_API std::string GetFirstUserFrameInPython(); + +class TORCH_API DebugUtil { + public: + enum GraphFormat { + kText, + kDot, + kBackend, + }; + + static GraphFormat GetDefaultGraphFormat(); + + // Dumps the current Python frame and the IR Graph whose roots are the IR + // values held at the tensors. If indices is not nullptr, it selects the + // indices of the tensors whose graph will be emitted. + static std::string GetTensorsGraphInfo( + c10::ArrayRef tensors, + const std::vector* indices, + GraphFormat format = GetDefaultGraphFormat()); + + // If the environment variable LTC_SAVE_TENSORS_FILE is set to the proper + // output path, an instance of the report returned by GetTensorsGraphInfo() is + // saved. + static void SaveTensorsGraphInfo( + const char* name, + c10::ArrayRef tensors, + const std::vector* indices, + GraphFormat format = GetDefaultGraphFormat()); + + static bool ExperimentEnabled(const std::string& name); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h new file mode 100644 index 0000000000000000000000000000000000000000..d21d6feb0ba58ad5e17f7e92b1e14c4334c3475e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +/** + * The goal of "dynamic" Nodes is to patch a hole in our tracing. + * Previously, if a user called `sizes` on a Tensor, it would leak out + * of our tracing system, as `sizes` returns a torch.Size or an int. To + * prevent this from happening, we introduce DimensionNode, a new type + * of Node that abstracts the operation of getting the dimensions of a + * Tensor. + * + * Consider the following example: + * ``` + * numel = x.shape()[0] * x.shape()[1] + * ``` + * + * Here, `x.shape()[i]` will be a SizeNode (subclass of DimensionNode), + * and the multiplication of the two SizeNodes will be represented by + * a SizeMul (also a subclass of DimensionNode). Through this, we can + * prevent `numel` from being represented as a Python int and thus + * burned into the Graph. + */ + +class TORCH_API DimensionNode { + public: + virtual bool isSymbolic() const { + return false; + } + virtual int64_t getDynamicValue() const { + TORCH_CHECK(false, "NYI"); + } + virtual int64_t getStaticValue() const { + TORCH_CHECK(false, "NYI"); + } + virtual ~DimensionNode() = default; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h new file mode 100644 index 0000000000000000000000000000000000000000..f224aae7bf62257f83e4e5db89c843a9323777a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h @@ -0,0 +1,247 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Hash utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/util.h + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using size_t = std::size_t; + +class TORCH_API hash_t : public c10::uint128 { + public: + // Switch from typedef hash_t = uint128 to provide explicit casters + hash_t(int8_t val) : uint128(static_cast(val)) {} + hash_t(int16_t val) : uint128(static_cast(val)) {} + hash_t(int32_t val) : uint128(static_cast(val)) {} + hash_t(int64_t val) : uint128(static_cast(val)) {} + hash_t(uint32_t val) : uint128(val) {} + hash_t(uint64_t val) : uint128(val) {} + hash_t(uint128 val) : uint128(val) {} + hash_t(uint64_t top, uint64_t bottom) : uint128(top, bottom) {} + hash_t() = default; +}; + +// Std* functions use 64-bit hash +size_t TORCH_API StdDataHash(const void* data, size_t size); + +size_t TORCH_API StdHashCombine(uintmax_t a, uintmax_t b); + +// Other functions are all 128-bit +hash_t TORCH_API HashBlock(const void* data, size_t n, const hash_t& seed); + +hash_t TORCH_API DataHash(const void* data, size_t size); + +hash_t TORCH_API HashCombine(const hash_t& a, const hash_t& b); + +size_t TORCH_API HashReduce(const hash_t& a); + +// Returns a string representation of a hash +std::string TORCH_API HashToString(const hash_t& a); + +struct HashReducer { + size_t operator()(const hash_t& value) const { + return HashReduce(value); + } +}; + +static inline hash_t StringHash(const char* data) { + return DataHash(data, std::strlen(data)); +} + +// Automatic templated implementation for 'arithmetic' types +template >* = nullptr> +hash_t Hash(const T& value) { + return DataHash(&value, sizeof(value)); +} + +// added because on macos builds the vector specialization +// breaks falling through to the templated arithmetic types above +hash_t TORCH_API Hash(const std::vector& value); + +// Specialized implementations for proprietary types +static inline hash_t Hash(const c10::ScalarType& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::MemoryFormat& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::DeviceType& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::Device& value) { + return HashCombine(Hash(value.type()), Hash(value.index())); +} + +static inline hash_t Hash(const c10::Layout& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::Scalar& value) { + switch (value.type()) { + case c10::ScalarType::ComplexDouble: + return Hash(value.toComplexDouble()); + case c10::ScalarType::Double: + return Hash(value.toDouble()); + case c10::ScalarType::Long: + return Hash(value.toLong()); + case c10::ScalarType::Bool: + return Hash(value.toBool()); + default: + TORCH_INTERNAL_ASSERT(false, "Unknown scalar type.", value.type()); + } +} + +static inline hash_t TensorHash(const at::Tensor& tensor) { + at::Tensor ctensor = tensor.contiguous(); + int64_t size = ctensor.numel() * ctensor.element_size(); + switch (ctensor.scalar_type()) { + case at::ScalarType::Bool: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Byte: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Char: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Short: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Int: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Long: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Float: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Double: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::BFloat16: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Half: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::ComplexFloat: + return DataHash(ctensor.const_data_ptr>(), size); + case at::ScalarType::ComplexDouble: + return DataHash(ctensor.const_data_ptr>(), size); + case at::ScalarType::UInt16: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::UInt32: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::UInt64: + return DataHash(ctensor.const_data_ptr(), size); + default: + TORCH_INTERNAL_ASSERT( + false, "Unsupported scalar type:", ctensor.scalar_type()); + } +} + +static inline hash_t Hash(const std::string& value) { + return DataHash(value.data(), value.size()); +} + +static inline hash_t Hash(const std::string_view& value) { + return DataHash(value.data(), value.size()); +} + +static inline hash_t Hash(const at::Generator& value) { + return TensorHash(value.get_state()); +} + +// Taken from glibc's implementation of hashing optionals, +// we want to include a contribution to the hash to distinguish +// cases where one or another option was null, but we hope it doesn't +// collide with an actually scalar value. +// +// Use an arbitrary randomly-selected 64-bit integer rather than a +// small constant that we then hash at runtime so we don't have to +// repeatedly hash a constant at runtime. +// NOLINTNEXTLINE(*-narrowing-conversions) +static const int64_t kNullOpt = 0x8655d738f3678dda; + +// Hashing for std::optional types contributes to hash +// for optionals with null value, important to distinguish +// between and cases +template +hash_t Hash(const std::optional& value) { + if (value.has_value()) { + return Hash(value.value()); + } else { + return kNullOpt; + } +} + +// Hashing of containers +// Forward declare to allow hashes of vectors of vectors to work. +template +hash_t ContainerHash(const T& values); + +template +hash_t Hash(const std::vector& values) { + return ContainerHash(values); +} + +// Need a special case for std::optional? +template +hash_t Hash(const std::optional>& value) { + if (value.has_value()) { + return ContainerHash(value.value()); + } else { + return kNullOpt; + } +} + +template +hash_t Hash(const std::set& values) { + return ContainerHash(values); +} + +template +hash_t Hash(const std::pair& values) { + return HashCombine(Hash(values.first), Hash(values.second)); +} + +static inline hash_t Hash(const hash_t& value) { + return value; +} + +template +hash_t Hash(c10::ArrayRef values) { + return ContainerHash(values); +} + +template +hash_t ContainerHash(const T& values) { + hash_t h(static_cast(0x85ebca77c2b2ae63)); + for (const auto& value : values) { + h = HashCombine(h, Hash(value)); + } + return h; +} + +// Varargs hashing +template +hash_t MHash() { + return hash_t(static_cast(0x165667b19e3779f9)); +} + +template +hash_t MHash(T value, Targs... Fargs) { + return HashCombine(Hash(value), MHash(Fargs...)); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..440e880fcb76b7e60e7a383d6b820ab6a3c7ae34 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// TODO: Consolidate this file with util.h + +namespace torch::lazy { + +// Converts an iterable container to a vector of int64's. +template +static std::vector ToI64Vector(const S& input) { + return ToVector(input); +} + +// Creates a set of dimension by dropping the drop_dims ones. +TORCH_API std::vector DropDimensions( + c10::ArrayRef sizes, + c10::ArrayRef drop_dims); + +// Get the canonical dimension index in the [0, rank) interval. Negative +// indices are interpreted as follows: -1 is rank-1, -2 is rank-2 etc. +TORCH_API int64_t GetCanonicalDimensionIndex(int64_t dim, int64_t rank); + +// Same as above, for multiple dimensions. +TORCH_API std::vector GetCanonicalDimensionIndices( + c10::ArrayRef dimensions, + int64_t rank); + +// Returns the canonical position in the dim dimension, handling negative +// values for the position. +TORCH_API int64_t GetCanonicalPosition( + c10::ArrayRef dimensions, + int64_t dim, + int64_t pos); + +// Creates a transposition from the given input and dimensions. +TORCH_API std::vector MakeTransposePermutation( + int64_t dim0, + int64_t dim1, + int64_t rank); + +// Calculates the protomoted shape to which the input shapes should be +// broadcasted for an elementwise operation. The size of the common dimensions +// (2,3,4 for shape1, and 0,1,2 for shape2) must either match, or either one +// of the two be 1. +// Example: +// shape1 = [9, 7, 6, 1, 2] +// shape2 = [6, 5, 2] +// result_shape = [9, 7, 6, 5, 2] +TORCH_API std::vector GetPromotedShape( + c10::ArrayRef shape1_dims, + c10::ArrayRef shape2_dims); + +TORCH_API Shape +GetPromotedBinaryOpShape(const Shape& shape1, const Shape& shape2); + +TORCH_API std::vector StrSplit(std::string_view text, char delim); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29c0ebbbe8c4471051f45a7a00e3138c2e54c2f8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h @@ -0,0 +1,54 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include + +namespace torch::lazy { + +class TORCH_API OpKindWrapper { + public: + explicit OpKindWrapper(const char* name) : name_(name) {} + + const OpKind& operator*() const { + return get(); + } + + operator OpKind() const { + return get(); + } + + private: + const OpKind& get() const { + c10::call_once(once_, [this]() { op_kind_ = OpKind::Get(name_); }); + return op_kind_; + } + + const char* name_; + mutable OpKind op_kind_; + mutable c10::once_flag once_; +}; + +const OpKindWrapper ltc_all_to_all("lazy_tensors::all_to_all"); +const OpKindWrapper ltc_cast("lazy_tensors::cast"); +const OpKindWrapper ltc_collective_permute("lazy_tensors::collective_permute"); +const OpKindWrapper ltc_cross_replica_sum("lazy_tensors::cross_replica_sum"); +const OpKindWrapper ltc_device_data("lazy_tensors::device_data"); +const OpKindWrapper ltc_get_dimensions_size( + "lazy_tensors::ltc_get_dimensions_size"); +const OpKindWrapper ltc_moving_average("lazy_tensors::moving_average"); +const OpKindWrapper ltc_nms("lazy_tensors::nms"); +const OpKindWrapper ltc_not_supported("lazy_tensors::not_supported"); +const OpKindWrapper ltc_replication_pad("lazy_tensors::replication_pad"); +const OpKindWrapper ltc_replication_pad_backward( + "lazy_tensors::replication_pad_backward"); +const OpKindWrapper ltc_tensor_data("lazy_tensors::tensor_data"); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h new file mode 100644 index 0000000000000000000000000000000000000000..3c096234558d1bfbafe7d4cecf9b2d306480f979 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h @@ -0,0 +1,294 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_dynamic_shapes); + +namespace torch::lazy { + +static const hash_t kHashSeed(static_cast(0x5a2d296e9)); + +class Node; +struct Output; +struct Value; + +using NodePtr = std::shared_ptr; + +// The Kind of operation a Node can be associated to. +struct TORCH_API OpKind { + OpKind() = default; + explicit OpKind(c10::Symbol op) : op(op) {} + + bool operator==(const OpKind& rhs) const { + return op == rhs.op; + } + bool operator!=(const OpKind& rhs) const { + return !operator==(rhs); + } + bool operator<(const OpKind& rhs) const { + return c10::unique_t(op) < c10::unique_t(rhs.op); + } + + hash_t hash() const; + + std::string ToString() const { + return op.toQualString(); + } + + // Retrieves an existing operation object, or creates a new one. Operations + // that are specific to lazy tensors, should live within the 'lazy_tensors::' + // namespace. + static OpKind Get(const std::string& name); + + c10::Symbol op; +}; + +inline std::ostream& operator<<(std::ostream& stream, const OpKind& op) { + stream << op.ToString(); + return stream; +} + +using OpList = c10::ArrayRef; + +hash_t OperandHashes( + const OpList& operands, + const hash_t& seed, + bool bakeInSizes); +// A node in the graph. Nodes for operations which require extra data to be +// stored for lowering should inherit from this class and add an operation +// specific member there. For example, a constant might create a new +// NodeConstant class (inheriting from Node) with an extra lazy_tensors::Literal +// field, or a tensor value might create a new NodeTensor with a computation +// client data handle in it. +class TORCH_API Node { + public: + static bool enableDynamicShape(); + + // Creates a new node with the given op name. The op is a unique identifier + // for the operation. The num_outputs tells how many outputs a given operation + // generates. + // + // None leaf node's node_hash does not contains shape information always. + // So we pass in the hash value rather than a function. + Node(OpKind op, size_t num_outputs); + + // Construct node with operands and shapes + Node( + OpKind op, + OpList operands, + std::vector&& shapes, + size_t num_outputs = 1); + + // Construct node with operands and no shape + Node(OpKind op, OpList operands, size_t num_outputs = 1); + + // Construct node with shape and no operands + Node(OpKind op, Shape shape, size_t num_outputs = 1); + + virtual ~Node() = default; + + const OpKind& op() const { + return op_; + } + + size_t num_outputs() const { + return num_outputs_; + } + + // Retrieves the full shape of the IR Node. + virtual c10::ArrayRef shapes() const; + + virtual const Shape& shape(size_t output_index = 0) const; + + // Add the shape computed by the shape_fn + void addComputedShape(const std::function& shape_fn); + + // Compute the shape using the provided shape_fn if not previously cached + Shape computeShape(const std::function& shape_fn); + + virtual const std::vector& operands() const; + + virtual const Output& operand(size_t i) const; + + // Gets operand at index i if index is valid, or kNullOutput otherwise. + virtual const Output& nullable_operand(size_t i) const; + + // Returns the hash of the dag used to look up the compiled graph + virtual hash_t hash() const = 0; + + // Returns the hash of the dag used to for shape caching + virtual hash_t shapeHash() const = 0; + + const MetaData& metadata() const { + return metadata_; + } + + UserMetaData* user_metadata() const { + return user_metadata_.get(); + } + + std::shared_ptr SetUserMetadata( + std::shared_ptr user_meta) { + std::swap(user_metadata_, user_meta); + return user_meta; + } + + virtual std::string ToString() const; + + private: + // The ID of the operation captured by this node. + OpKind op_; + size_t num_outputs_ = 1; + + // The IR specific metadata attached to the IR node. + MetaData metadata_; + // The IR framework user can attach a user defined metadata object deriving + // from UserMetaData. + std::shared_ptr user_metadata_; + + protected: + // Adds node's index output number as operand. + void AddOperand(const NodePtr& node, size_t index = 0); + + std::vector shapes_; + // A node holds a real reference to its operands. + std::vector operands_; + // Outputs do not hold references on the nodes, and neither do the uses, since + // otherwise we get into circular reference counting. + std::vector operands_as_outputs_; +}; + +inline std::ostream& operator<<(std::ostream& stream, const Node& node) { + stream << node.ToString(); + return stream; +} + +// Note: Keep this version of NodeCast for smooth PyTorch/XLA migration, and +// clean up once the migration is done. +template +const T* NodeCast(const Node* node, OpKind op) { + if (op != node->op()) { + return nullptr; + } +#ifdef NDEBUG + return static_cast(node); +#else + return &dynamic_cast(*node); +#endif +} + +template +const T* NodeCast(const Node* node) { + if (T::ClassOpKind() != node->op()) { + return nullptr; + } + // TODO: Some IR classes share the same opkind, such as Mean and MeanDim, so + // static_cast is not safe here. Unless we have opkind unique for each class, + // we have to use dynamic_cast here. + return dynamic_cast(node); +} + +// Represents a specific output produced by a node. Since the output of a node +// can be composed by multiple outputs, the node+index coordinates fully qualify +// each single output. +struct TORCH_API Output { + struct Hasher { + size_t operator()(const Output& output) const; + }; + + Output() = default; + explicit Output(const Node* node, size_t index = 0) + : node(node), index(index) {} + + hash_t hash() const; + hash_t shapeHash() const; + + bool operator==(const Output& rhs) const { + return node == rhs.node && index == rhs.index; + } + + // To compare the operands of to-be-constructed node and to-be-reused node + bool operator==(const Value& rhs) const; + + bool operator!=(const Output& rhs) const { + return !operator==(rhs); + } + + const Shape& shape() const { + return node->shape(index); + } + + std::string ToString() const; + + // The node providing the output. + const Node* node{nullptr}; + // The index in the node's output this output refers to. + size_t index{0}; +}; + +inline std::ostream& operator<<(std::ostream& stream, const Output& output) { + stream << output.ToString(); + return stream; +} + +template +using OutputMap = std::unordered_map; + +// Represents an input/operand for a Node object. +struct TORCH_API Value { + Value() = default; + /* implicit */ Value(NodePtr&& node, size_t index = 0) + : node(std::move(node)), index(index) {} + /* implicit */ Value(const NodePtr& node, size_t index = 0) + : node(node), index(index) {} + + hash_t hash() const; + hash_t shapeHash() const; + + operator bool() const { + return node != nullptr; + } + + operator Output() const { + return Output(node.get(), index); + } + + const Shape& shape() const { + return node->shape(index); + } + + Node* operator->() const { + return node.get(); + } + + NodePtr node; + size_t index = 0; +}; + +} // namespace torch::lazy + +namespace c10 { +// Explicit template instantiation to make ArrayRef work +template class at::ArrayRef; +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..c1b1c2abc8f342f42fea0e315b2d6762cace2d33 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h @@ -0,0 +1,153 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +// This file is part of the backend interface. So, ops shouldn't be added or +// removed without due process The exception to this being the view ops which +// will be removed soon pending functionalization + +namespace torch::lazy { + +template +NodePtr ReuseNode(Args&&... args) { + if (FLAGS_torch_lazy_reuse_ir) { + return LookupNodeFromTrieCache(std::forward(args)...); + } + return nullptr; +} + +// Caching an IR node into TrieCache +static inline void CacheNode(NodePtr node) { + if (FLAGS_torch_lazy_reuse_ir) { + TrieCache::Get()->Insert(std::move(node)); + } +} + +template +NodePtr MakeNode(Args&&... args) { + return std::make_shared(std::forward(args)...); +} + +// op is passed in for a more efficient node casting, see the implementation of +// NodeCast +template +NodePtr ReuseOrMakeNode(Args&&... args) { + NodePtr node = ReuseNode(std::forward(args)...); + if (!node) { + node = MakeNode(std::forward(args)...); + CacheNode(node); + } + return node; +} + +struct IrBuilder { + virtual NodePtr MakeDeviceData( + const std::shared_ptr& data) const = 0; + virtual NodePtr MakeScalar( + const at::Scalar& value, + const at::ScalarType& type) const = 0; + virtual NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) const = 0; + virtual NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = std::nullopt) const = 0; + virtual NodePtr MakeTensorList(const OpList& inputs) const = 0; + virtual NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = static_cast(0x5a2d296e9)) const = 0; + + // dynamic ir nodes + virtual NodePtr MakeSizeNode(const Value& input, size_t dim) const = 0; + virtual NodePtr MakeSizeAdd(const Value& a, const Value& b) const = 0; + virtual NodePtr MakeSizeMul(const Value& a, const Value& b) const = 0; + virtual NodePtr MakeSizeDiv(const Value& a, const Value& b) const = 0; + + virtual ~IrBuilder() = default; +}; + +static inline NodePtr MakeDeviceData(const std::shared_ptr& data) { + return getIrBuilder()->MakeDeviceData(data); +} +static inline NodePtr MakeScalar( + const at::Scalar& value, + const at::ScalarType& type) { + return getIrBuilder()->MakeScalar(value, type); +} +static inline NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) { + return getIrBuilder()->MakeExpand(input0, size, is_scalar_expand); +} +static inline NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = std::nullopt) { + return getIrBuilder()->MakeCast(input0, dtype, stype); +} +static inline NodePtr MakeTensorList(const OpList& inputs) { + return getIrBuilder()->MakeTensorList(inputs); +} +static inline NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = static_cast(0x5a2d296e9)) { + return getIrBuilder()->MakeGeneric( + op, operands, shape, num_outputs, hash_seed); +} + +// dynamic ir nodes +static inline NodePtr MakeSizeNode(const Value& input, size_t dim) { + return getIrBuilder()->MakeSizeNode(input, dim); +} +static inline NodePtr MakeSizeAdd(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeAdd(a, b); +} +static inline NodePtr MakeSizeMul(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeAdd(a, b); +} +static inline NodePtr MakeSizeDiv(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeDiv(a, b); +} + +inline Value GetSymIntValue(const c10::SymInt& a) { + if (auto ma = a.maybe_as_int()) { + return Value(MakeScalar(*ma, at::kLong), 0); + } else { + return Value( + dynamic_cast(a.toSymNodeImplUnowned()) + ->node_, + 0); + } +} + +// TODO: this should return Value +inline std::vector GetSymIntArrayRefValue(c10::SymIntArrayRef arr) { + std::vector r; + for (const auto& a : arr) { + r.emplace_back(a.guard_int(__FILE__, __LINE__)); + } + return r; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h new file mode 100644 index 0000000000000000000000000000000000000000..7a27bd8dbec82daf15acb6ac695a8aba86dac08d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +class BackendDevice; + +class TORCH_API DumpUtil { + public: + static std::string ToDot(c10::ArrayRef nodes); + + static std::string PostOrderToDot( + c10::ArrayRef post_order, + c10::ArrayRef roots); + + static std::string ToText(c10::ArrayRef nodes); + + static std::string PostOrderToText( + c10::ArrayRef post_order, + c10::ArrayRef roots); + + static std::string ToBackend( + c10::ArrayRef values, + const BackendDevice& device); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..8b913e2342810b322e4470fb01332a98a40d5116 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::lazy { +struct SourceLocation { + std::string file; + std::string function; + int line = -1; +}; + +TORCH_API void EmitShortFrameInfo( + std::ostream& stream, + const std::vector& frames); + +TORCH_API std::ostream& operator<<( + std::ostream& stream, + const std::vector& frames); + +// The base class for user defined metadata which is possible to attach to IR +// nodes. +struct TORCH_API UserMetaData { + virtual ~UserMetaData() = default; +}; + +struct TORCH_API MetaData { + std::string scope; + std::vector frame_info; +}; + +// TODO(whc) is this going to be used outside of in IR decompositions? +// RAII data structure to be used a stack variable to enter a new IR scope. IR +// scope names will appear in the IR and will help identifying the source of the +// single IR nodes. +struct TORCH_API ScopePusher { + explicit ScopePusher(const std::string& name); + ~ScopePusher(); + ScopePusher(ScopePusher&& other) = delete; + ScopePusher(const ScopePusher&) = delete; + ScopePusher& operator=(const ScopePusher&) = delete; + ScopePusher& operator=(ScopePusher&&) = delete; + + static void ResetScopes(); +}; + +TORCH_API MetaData GetMetaDataIfDebugging(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h new file mode 100644 index 0000000000000000000000000000000000000000..bb2f2420a1028cd6aa1d5b58e456dcf767904e5e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::lazy { + +class TORCH_API Util { + public: + // Tracks the emission status of the nodes during the post-order generation. + // It helps tracking loops within the computation graphs. + enum EmitStatus { + kNotEmitted, + kEmitting, + kEmitted, + }; + + using EmissionMap = std::unordered_map; + + // Computes the post order from the given node, without using recursion. The + // emission map can be used as saved state, for multiple separate calls to + // this API. The returned post-order can be empty if the node has already been + // emitted inside the emission map. An error is generated if a loop is + // detected. + static std::vector ComputePostOrder( + const Node* node, + EmissionMap* emap); + + static std::vector ComputePostOrder( + c10::ArrayRef nodes, + EmissionMap* emap); + + // Same as above, but computes the post order on the set of nodes specified as + // argument. + static std::vector ComputePostOrder( + c10::ArrayRef nodes); + + // Retrieves the number of nodes within the graph whose sink are passed in the + // nodes argument. + static size_t GetGraphSize(c10::ArrayRef nodes); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..f7937602a8f3877b698ec06844bb5445d3e580ea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h @@ -0,0 +1,434 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API LazyGraphExecutor { + public: + struct DeviceDataInfo : public BackendData::Info { + DeviceDataInfo(int64_t tensor_id, bool read_only) + : tensor_id(tensor_id), read_only(read_only) {} + + int64_t tensor_id = 0; + bool read_only = false; + }; + + // Register a lazy graph executor instance that can be retrieved using Get() + static void Register(LazyGraphExecutor* /*executor*/); + static LazyGraphExecutor* Get(); + + virtual ~LazyGraphExecutor() = default; + + // Override these methods to perform custom tensor registration and + // unregistration Note: It is vital that the parent implementations are also + // called in order for the tensors to show up in the live tensor list + virtual void RegisterTensor(std::shared_ptr data); + virtual void UnregisterTensor(LazyTensor::Data* data); + + // Seed for random generator. + // Override to supply your own DeviceContextArena. + virtual Value GetRngSeed(const BackendDevice& device); + virtual uint64_t GetRunningSeed(const BackendDevice& device); + virtual void SetRngSeed(const BackendDevice& device, uint64_t seed); + + void DeviceBarrier(const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Tensor& tensor, + const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + // Retrieves the set of lazy tensors which are currently live in the system, + // for the given device. If device is nullptr, the live tensors for all + // devices will be returned. Returned tensors are sorted by device as primary + // key, and by unique ID as secondary key. + std::vector GetLiveTensors(const BackendDevice* device); + + // Makes sure that any outstanding IR operation accumulated over live tensors, + // gets turned into device data. If wait is true, the sync operation will be + // run synchronously. The devices argument, if not empty, tells the devices + // which should be partecipating into the replicated computation. + virtual void SyncLiveTensorsGraph( + const BackendDevice* device, + c10::ArrayRef devices, + bool wait); + + // Applies all the pending IR operations queued over the input tensors. All + // the tensors must be on the same device. If wait is true, the sync operation + // will be run synchronously. The devices argument, if not empty, tells the + // devices which should be partecipating into the replicated computation. + void SyncTensorsGraph( + std::vector* tensors, + c10::ArrayRef devices, + bool wait, + bool sync_ltc_data); + + // Marks an execution step, which allows the tensor framework to understand + // the computation boundaries. + // Override to supply your own DeviceContextArena. + virtual void MarkStep(const BackendDevice& device); + + // Waits for all the outstanding operations on all the supplied devices. + // If devices is empty, the wait will happen for all local devices. + void WaitDeviceOps(c10::ArrayRef devices); + + // Retrieves the PyTorch CPU tensors behind the lazy tensors IR operations. + // All the tensors must be on the same device. + std::vector GetTensors(std::vector* tensors); + + size_t IncTrimCounter() const; + + // Dumps the backend specific text of the computation accumulated in the graph + // which is attached the tensors. + std::string DumpBackendComputation(const std::vector& tensors); + + Value GetDeviceDataIrValue( + const at::Scalar& value, + c10::ScalarType type, + const BackendDevice& device); + Value GetIrValueForScalar( + const at::Scalar& value, + c10::ScalarType type, + const BackendDevice& device); + Value GetIrValueForScalar( + const at::Scalar& value, + const BackendDevice& device); + + // TODO: even though this API is currently used **only** in codegen to + // generate real scalar IR values vs scalar tensors, we would like to + // use it in other cases where `GetIrValueForXXXScalar` is used, as well + // In order to do that, we need to untangle the cases where we don't need + // `expand` and where we don't expect a scalar tensor + Value GetIrValueForScalarFromCodegen( + const at::Scalar& value, + const BackendDevice& device); + Value GetIrValueForExpandedScalar( + const at::Scalar& value, + const Shape& shape, + const BackendDevice& device); + + struct CachedComputation { + explicit CachedComputation(ComputationPtr computation) + : computation(std::move(computation)) {} + + ComputationPtr computation; + }; + + using ComputationCache = Cache; + + ComputationCache* GetComputationCache(); + + hash_t GetGraphHash(const std::vector& tensors); + + // Clear the computation cache. + void ClearComputationCache(); + // Remove a specific computation cache entry from its hash. + void RemoveFromComputationCache(const hash_t& hash); + + protected: + // TODO(alanwaketan): Revisit if all of them need to be accessible to + // derived classes. + + struct SyncTensorsConfig { + // Whether we want to force data on the target tensors (hence trimming + // the IR graph above them). + bool force_ltc_data = true; + // Whether when setting the data, the other properties of the tensor + // state should be reset. + bool sync_ltc_data = true; + }; + + struct SyncTensorCollection { + SyncTensorCollection() : hash(0) {} + + SyncTensorsConfig config; + std::vector indices; + hash_t hash; + std::vector unlocker; + BackendDevice device; + }; + + struct PostOrderData { + std::vector post_order; + Util::EmissionMap emission_map; + std::vector parameters_data; + std::vector parameter_sequence; + }; + + // Locking: + // We perform two kinds of operations of tensors, synchronous and + // asynchronous. The ApplyPendingGraph() are synchronous, as we need the + // device data result immediately. Before the synchronous operations can + // start, they need to wait that the pending asynchronous operations have + // completed. Synchronous operations do not hold device locks, since they are + // strictly sequential, dictated by the PyTorch execution order. The + // SyncTensorsGraph() is asynchronous, and returns immediately after having + // scheduled the asynchronous operation. While executing, the asynchronous + // operations will hold locks on all the participating devices (in most common + // cases there will be only one device). + // Since asynchronous operations capture device locks, only one asynchronous + // operation can execute at the same time, on a given device. Tensor + // operations which send data to device do not need to hold any device locks + // while doing so. Only operations which _use_ device data (computations, and + // transfer from server) need to wait for asynchronous operations to complete + // (barrier). + + class DeviceLocker { + public: + explicit DeviceLocker(BackendDevice device) : device_(std::move(device)) {} + + const BackendDevice& device() const { + return device_; + } + + void Lock(); + void Unlock(std::exception_ptr exptr); + void Barrier(); + + private: + void CheckResetException(); + + BackendDevice device_; + std::mutex mutex_; + std::condition_variable cv_; + bool locked_ = false; + std::exception_ptr exptr_; + }; + + class DeviceLockerArena { + public: + static DeviceLockerArena* Get(); + + std::shared_ptr GetLocker(const BackendDevice& device); + + void DeviceBarrier(const BackendDevice& device); + + // Use a set to impose an order on the device locking sequence (ABBA + // prevention). + std::vector LockDevices( + const std::set& devices); + + private: + ExceptionCleanup LockDevice(const BackendDevice& device); + + std::mutex mutex_; + std::map> lockers_; + }; + + class DataCacheArena { + public: + static DataCacheArena* Get(); + + BackendDataPtr GetDeviceData( + const at::Tensor& tensor, + const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + private: + struct TensorHasher { + size_t operator()(const at::Tensor& tensor) const; + }; + struct TensorComparer { + bool operator()(const at::Tensor& tensor1, const at::Tensor& tensor2) + const; + }; + + explicit DataCacheArena(size_t max_cache_size); + + using DataCache = + Cache; + + DataCache* GetDataCache(const BackendDevice& device); + + size_t max_cache_size_ = 0; + std::mutex mutex_; + std::map> device_caches_; + }; + + // The DeviceContextArena holds per device live information and statistics, + // among which the lazy tensors which are currently alive in the system. This + // is used to create computation "barriers" in order to flush pending + // operations and ensure the same computations are created during the training + // loops. + // TODO(alanwaketan): Add a registry such that we don't need to make all + // related methods virtual. + class DeviceContextArena { + protected: + struct DeviceContext { + std::mutex lock; + std::map> tensors_data; + uint64_t seed = 101; + uint64_t running_seed = 101; + Value seed_ir_value; + }; + + public: + static DeviceContextArena* Get(); + virtual ~DeviceContextArena() = default; + + void RegisterTensor(std::shared_ptr data); + void UnregisterTensor(LazyTensor::Data* data); + + std::vector GetLiveTensors(const BackendDevice* device); + + // Overriding it allow derived class to use their own IRs for Value. + virtual Value GetRngSeed(const BackendDevice& device); + uint64_t GetRunningSeed(const BackendDevice& device); + void SetRngSeed(const BackendDevice& device, uint64_t seed); + + void MarkStep(const BackendDevice& device); + + std::vector GetActiveDevices(); + + protected: + DeviceContext* GetDeviceContext(const BackendDevice& device); + + void ForAllDeviceContexts( + const std::function& fn, + const BackendDevice* device); + + // Overriding it allow derived class to use their own conversions. + virtual Value IrValueFromScalar( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + private: + std::vector GetAllDeviceContexts(); + + std::mutex lock_; + std::map device_contexts_; + }; + + struct Async { + Async( + SyncTensorCollection* coll, + std::vector parameters_data, + std::vector tensors_data, + ComputationCache::TypePtr cached_computation); + virtual ~Async() = default; + + void Wait(); + + MultiWait mwait; + std::vector indices; + std::vector unlocker; + std::vector parameters_data; + BackendDevice device; + ComputationCache::TypePtr cached_computation; + std::vector tensors_data; + }; + + void ResetTrimCounter() const; + + // Waits for this SyncTensorCollection's device barrier and acquire the lock. + virtual void TensorCollectionBarrier(SyncTensorCollection* coll); + + // One can override to insert your own profiler. + virtual PostOrderData RunPostOrder( + const std::vector& ir_values, + SyncTensorCollection* coll); + + private: + struct CompilationResult { + BackendDevice device; + size_t emitted_nodes = 0; + ComputationPtr computation; + std::vector parameters_data; + }; + + virtual bool ShouldSyncTensor(const LazyTensorPtr& tensor) const; + + SyncTensorCollection CollectSyncTensors( + const std::vector& tensors, + const SyncTensorsConfig& config); + + std::vector CollectRoots( + const std::vector& tensors, + c10::ArrayRef indices); + + std::vector SetTensorData( + std::vector* tensors, + const SyncTensorsConfig& config, + c10::ArrayRef indices, + const std::vector& tensor_data_vec); + + void ExtractIRAndPrepareTensorData( + std::vector* tensors, + const SyncTensorsConfig& config, + c10::ArrayRef indices, + std::vector& ir_values, + std::vector& tensor_data_vec); + + std::shared_ptr TryRunCachedSync( + std::vector* tensors, + SyncTensorCollection* coll, + PostOrderData* po_data, + const std::vector& tensor_data_vec); + + CompilationResult Compile( + const std::vector& tensors, + c10::ArrayRef devices, + const SyncTensorCollection& coll, + PostOrderData* po_data, + const std::vector& ir_values); + + ComputationCache::TypePtr LookupCachedCompile(const hash_t& hash); + + std::shared_ptr SyncTensorsGraphInternal( + std::vector* tensors, + c10::ArrayRef devices, + const SyncTensorsConfig& config); + + // Schedules the execution of a sync tensors operation in background. The + // asynchronous operation will hold the device locks by capturing the ones + // present within the coll structure. + std::shared_ptr ScheduleSyncTensorsGraph( + SyncTensorCollection* coll, + std::vector parameters_data, + std::vector tensors_data, + ComputationCache::TypePtr cached_computation); + + std::shared_ptr ScheduleSyncTensorsGraph( + std::vector* tensors, + SyncTensorCollection* coll, + std::vector parameters_data, + ComputationCache::TypePtr cached_computation, + const std::vector& tensor_data_vec); + + std::vector GetTensorsFused(std::vector* tensors); + + std::vector FetchTensors( + std::vector* tensors, + c10::ArrayRef tensors_data, + const std::vector* indices); + + // Gathers the device data for all the input tensors, after an + // asynchronous operation. + std::vector GatherTensorsData( + const std::vector& tensors, + c10::ArrayRef indices, + c10::ArrayRef tensors_data); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h new file mode 100644 index 0000000000000000000000000000000000000000..a175d9358ce87beb902226c1700403b5c0f70bc5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h @@ -0,0 +1,293 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::lazy { + +struct TORCH_API Sample { + Sample() = default; + Sample(int64_t timestamp_ns, double value) + : timestamp_ns(timestamp_ns), value(value) {} + + int64_t timestamp_ns = 0; + double value = 0; +}; + +using MetricReprFn = std::function; + +// Class used to collect time-stamped numeric samples. The samples are stored in +// a circular buffer whose size can be configured at constructor time. +class TORCH_API MetricData { + public: + // Creates a new MetricData object with the internal circular buffer storing + // max_samples samples. The repr_fn argument allow to specify a function which + // pretty-prints a sample value. + MetricData(MetricReprFn repr_fn, size_t max_samples); + + // Returns the total values of all the samples being posted to this metric. + double Accumulator() const; + + size_t TotalSamples() const; + + void AddSample(int64_t timestamp_ns, double value); + + // Returns a vector with all the current samples, from the oldest to the + // newer. If accumulator is not nullptr, it will receive the current value of + // the metrics' accumulator (the sum of all posted values). If total_samples + // is not nullptr, it will receive the count of the posted values. + std::vector Samples(double* accumulator, size_t* total_samples) const; + + std::string Repr(double value) const { + return repr_fn_(value); + } + + void Reset(); + + bool IsValid() const { + return TotalSamples() > 0; + } + + private: + mutable std::mutex lock_; + MetricReprFn repr_fn_; + size_t count_ = 0; + std::vector samples_; + double accumulator_ = 0.0; +}; + +// Counters are a very lightweight form of metrics which do not need to track +// sample time. +class TORCH_API CounterData { + public: + CounterData() : value_(0) {} + + void AddValue(int64_t value) { + value_ += value; + } + + int64_t Value() const { + return value_; + } + + void Reset() { + value_ = 0; + } + + bool IsValid() const { + return value_ > 0; + } + + private: + std::atomic value_; +}; + +class TORCH_API MetricsArena { + public: + static MetricsArena* Get(); + + void ResetCounters(); + void ResetMetrics(); + + // Registers a new metric in the global arena. + void RegisterMetric( + const std::string& name, + MetricReprFn repr_fn, + size_t max_samples, + std::shared_ptr* data); + + void RegisterCounter( + const std::string& name, + std::shared_ptr* data); + + void ForEachMetric( + const std::function& metric_func); + + void ForEachCounter( + const std::function& + counter_func); + + std::vector GetMetricNames(); + + MetricData* GetMetric(const std::string& name); + + std::vector GetCounterNames(); + + CounterData* GetCounter(const std::string& name); + + private: + std::mutex lock_; + std::map> metrics_; + std::map> counters_; +}; + +// Emits the value in a to_string() conversion. +TORCH_API std::string MetricFnValue(double value); +// Emits the value in a humanized bytes representation. +TORCH_API std::string MetricFnBytes(double value); +// Emits the value in a humanized time representation. The value is expressed in +// nanoseconds EPOCH time. +TORCH_API std::string MetricFnTime(double value); + +// The typical use of a Metric is one in which it gets created either in a +// global scope context: +// static Metric* metric = new Metric("RpcCount"); +// Or within a function scope: +// void MyFunction(...) { +// static Metric* metric = new Metric("RpcCount"); +// ... +// metric->AddSample(ts_nanos, some_value); +// } +class TORCH_API Metric { + public: + explicit Metric( + std::string name, + MetricReprFn repr_fn = MetricFnValue, + size_t max_samples = 0); + + const std::string& Name() const { + return name_; + } + + double Accumulator() const; + + void AddSample(int64_t timestamp_ns, double value); + + void AddSample(double value); + + std::vector Samples(double* accumulator, size_t* total_samples) const; + + std::string Repr(double value) const; + + private: + MetricData* GetData() const; + + std::string name_; + MetricReprFn repr_fn_; + size_t max_samples_; + mutable std::shared_ptr data_ptr_; + mutable std::atomic data_; +}; + +// A Counter is a lightweight form of metric which tracks an integer value which +// can increase or decrease. +// A typical use is as: +// static Counter* counter = new Counter("MyCounter"); +// ... +// counter->AddValue(+1); +class TORCH_API Counter { + public: + explicit Counter(std::string name); + + void AddValue(int64_t value) { + GetData()->AddValue(value); + } + + int64_t Value() const { + return GetData()->Value(); + } + + private: + CounterData* GetData() const; + + std::string name_; + mutable std::shared_ptr data_ptr_; + mutable std::atomic data_; +}; + +#define TORCH_LAZY_COUNTER(name, value) \ + do { \ + static ::torch::lazy::Counter* __counter = \ + new ::torch::lazy::Counter(name); \ + __counter->AddValue(value); \ + } while (0) + +#define TORCH_LAZY_FN_COUNTER(ns) TORCH_LAZY_COUNTER(c10::str(ns, __func__), 1) + +#define TORCH_LAZY_VALUE_METRIC(name, value) \ + do { \ + static ::torch::lazy::Metric* __metric = \ + new ::torch::lazy::Metric(name, torch::lazy::MetricFnValue); \ + __metric->AddSample(value); \ + } while (0) + +// Creates a report with the current metrics statistics. +TORCH_API std::string CreateMetricReport(); + +// Creates a report with the selected metrics statistics. +TORCH_API std::string CreateMetricReport( + const std::vector& counter_names, + const std::vector& metric_names); + +// Returns the currently registered metric names. Note that the list can grow +// since metrics are usually function initialized (they are static function +// variables). +TORCH_API std::vector GetMetricNames(); + +// Retrieves the metric data of a given metric, or nullptr if such metric does +// not exist. +TORCH_API MetricData* GetMetric(const std::string& name); + +// Returns the currently registered counter names. Note that the list can grow +// since counters are usually function initialized (they are static function +// variables). +TORCH_API std::vector GetCounterNames(); + +// Retrieves the counter data of a given counter, or nullptr if such counter +// does not exist. +TORCH_API CounterData* GetCounter(const std::string& name); + +// Retrieves the current EPOCH time in nanoseconds. +TORCH_API int64_t NowNs(); + +// Scope based utility class TORCH_API to measure the time the code takes within +// a given C++ scope. +class TORCH_API TimedSection { + public: + explicit TimedSection(Metric* metric) : metric_(metric), start_(NowNs()) {} + + TimedSection(TimedSection&& other) = delete; + TimedSection(const TimedSection&) = delete; + TimedSection& operator=(const TimedSection&) = delete; + TimedSection& operator=(TimedSection&&) = delete; + ~TimedSection() { + int64_t now = NowNs(); + metric_->AddSample(now, static_cast(now - start_)); + } + + double Elapsed() const { + return 1e-9 * static_cast(NowNs() - start_); + } + + private: + Metric* metric_; + int64_t start_; +}; + +#define TORCH_LAZY_TIMED(name) \ + static torch::lazy::Metric* timed_metric = \ + new torch::lazy::Metric(name, torch::lazy::MetricFnTime); \ + torch::lazy::TimedSection timed_section(timed_metric) + +#define TORCH_LAZY_FN_COUNTER_TIMED_TRACING(ns) \ + TORCH_LAZY_FN_COUNTER(ns); \ + TORCH_LAZY_TIMED("LazyTracing") + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h new file mode 100644 index 0000000000000000000000000000000000000000..c808d3cc6dc6221ea61fee86f86e114e5fdee08c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h @@ -0,0 +1,65 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/multi_wait.h + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::lazy { + +// Support waiting for a number of tasks to complete. +class TORCH_API MultiWait { + public: + explicit MultiWait(size_t count) : count_(count) {} + + // Signal the completion of a single task. + void Done(); + + // Waits until at least count (passed as constructor value) completions + // happened. + void Wait(); + + // Same as above, but waits up to wait_seconds. + void Wait(double wait_seconds); + + // Resets the threshold counter for the MultiWait object. The completed count + // is also reset to zero. + void Reset(size_t count); + + // Creates a completer functor which signals the mult wait object once func + // has completed. Handles exceptions by signaling the multi wait with the + // proper status value. This API returns a function which captures a MultiWait + // reference, so care must be taken such that the reference remains valid for + // the whole lifetime of the returned function. + std::function Completer(std::function func); + + // Similar as the above API, but with explicit capture of the MultiWait shared + // pointer. + static std::function Completer( + std::shared_ptr mwait, + std::function func); + + private: + void Complete(const std::function& func); + + std::mutex mutex_; + std::condition_variable cv_; + size_t count_ = 0; + size_t completed_count_ = 0; + std::exception_ptr exptr_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ff4fe2341d2bd27f1e13d716782ddaced8cb2b79 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +TORCH_API NodePtr operator+(const Value& node1, const Value& node2); +TORCH_API NodePtr operator-(const Value& node1, const Value& node2); +TORCH_API NodePtr operator*(const Value& node1, const Value& node2); +TORCH_API NodePtr operator/(const Value& node1, const Value& node2); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..f900b65fa2280f3cf08fc43942ba815ee3e6c45f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include +#include + +namespace torch::lazy { + +TORCH_API bool StrideIsSupported(c10::ArrayRef stride); + +TORCH_API std::vector GetArrayStridePermutation( + c10::ArrayRef stride); + +TORCH_API Shape MakeDiagonalShape( + const Shape& shape, + int64_t offset, + int64_t dim1, + int64_t dim2); + +TORCH_API Shape +MakePermuteShape(const Shape& source_shape, c10::ArrayRef permutation); + +TORCH_API Shape MakeSelectShape( + const Shape& shape, + int64_t dim, + int64_t start, + int64_t end, + int64_t stride); + +TORCH_API int64_t GetStride(int64_t start, int64_t end, int64_t stride); + +TORCH_API std::vector BuildSqueezedDimensions( + c10::ArrayRef dimensions, + int64_t squeeze_dim); + +TORCH_API std::vector BuildUnsqueezedDimensions( + c10::ArrayRef dimensions, + int64_t squeeze_dim); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h new file mode 100644 index 0000000000000000000000000000000000000000..6a3d5aaa7946fb920ac8d63c3b7524925a4b7b3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +TORCH_API std::vector InversePermutation( + c10::ArrayRef input_permutation); + +TORCH_API bool IsPermutation(c10::ArrayRef permutation); + +// Gathers the input using the order specified by the permutation. For each i, +// output[i] = dimensions[permutation[i]]. The given permutation must be the +// same size as the input. +template +std::vector PermuteDimensions( + c10::ArrayRef permutation, + const Container& dimensions) { + using T = typename Container::value_type; + TORCH_CHECK( + dimensions.size() == permutation.size(), + "Invalid permutation specified. dimensions.size() != permutation.size() (", + dimensions.size(), + " vs. ", + permutation.size(), + ")"); + TORCH_CHECK( + IsPermutation(permutation), + "Invalid permutation specified. Permutation is not permutation"); + std::vector output(dimensions.size()); + for (const auto i : c10::irange(permutation.size())) { + output[i] = dimensions[permutation[i]]; + } + return output; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h new file mode 100644 index 0000000000000000000000000000000000000000..7d4fdb375aa018a79d2df149297705c7cc4bc3f3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_symbolic_shapes); + +namespace torch::lazy { + +class TORCH_API Shape { + public: + Shape() = default; + + Shape( + at::ScalarType scalar_type, + c10::ArrayRef sizes, + std::optional> is_symbolic = std::nullopt); + + std::string to_string() const; + + c10::ScalarType scalar_type() const { + return scalar_type_; + } + void set_scalar_type(at::ScalarType value) { + scalar_type_ = value; + } + + int64_t dim() const { + return static_cast(sizes_.size()); + } + c10::ArrayRef sizes() const { + return sizes_; + } + int64_t size(int64_t dim) const { + return sizes_.at(dim); + } + void set_size(int64_t dim, int64_t size) { + sizes_.at(dim) = size; + } + + const std::optional>& is_symbolic() const { + return is_symbolic_; + } + + // Makes a copy with symbolic dims applied + Shape with_symbolic_dims( + std::optional> symbolic_dims) const; + + size_t numel() const; + hash_t hash(bool bakeInSizes) const; + + bool operator==(const Shape& other) const; + + private: + c10::ScalarType scalar_type_{c10::ScalarType::Undefined}; + + // Sizes are the upper bound sizes for a tensor, used by XLA. + std::vector sizes_; + // Stores which dimensions are symbolic + // If nullopt, either it hasn't been initialized or the symbolic + // dimensions are not calculable + std::optional> is_symbolic_ = std::nullopt; +}; + +TORCH_API std::ostream& operator<<(std::ostream& out, const Shape& shape); + +TORCH_API bool symbolicShapeEnabled(); +// Calculate and applies symbolic shapes onto the +// Shape objects passed to result_shapes +TORCH_API void applySymbolicShapesOnLT( + const char* schema_str, + std::vector args, + std::vector& result_shapes); +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h new file mode 100644 index 0000000000000000000000000000000000000000..9d1e9a1cd4c0087e9361f29e5c5319586cb05a3f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h @@ -0,0 +1,127 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { +// Turn clang-format off, as we rely on the whole signature being on one line +// for codegen. +// clang-format off +TORCH_API std::vector compute_shape__adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API std::vector compute_shape__adaptive_avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape__adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API std::vector compute_shape__adaptive_avg_pool3d_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape_abs(const at::Tensor & self); +TORCH_API std::vector compute_shape_arange_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +TORCH_API std::vector compute_shape_bernoulli(const at::Tensor & self, ::std::optional generator); +TORCH_API std::vector compute_shape_bernoulli(const at::Tensor & self, double p, ::std::optional generator); +TORCH_API std::vector compute_shape_binary_cross_entropy(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +TORCH_API std::vector compute_shape_binary_cross_entropy_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +TORCH_API std::vector compute_shape_cat(at::TensorList tensors, int64_t dim); +TORCH_API std::vector compute_shape_cholesky(const at::Tensor & self, bool upper); +TORCH_API std::vector compute_shape_clamp_min(const at::Tensor & self, const at::Scalar & min); +TORCH_API std::vector compute_shape_clone(const at::Tensor & self, ::std::optional memory_format); +TORCH_API std::vector compute_shape_constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value); +TORCH_API std::vector compute_shape_convolution(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups); +TORCH_API std::vector compute_shape_convolution_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & weight, at::OptionalIntArrayRef bias_sizes, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups, ::std::array output_mask); +TORCH_API std::vector compute_shape_embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse); +TORCH_API std::vector compute_shape_embedding_dense_backward(const at::Tensor & grad_output, const at::Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); +TORCH_API std::vector compute_shape_expand(const at::Tensor & self, at::IntArrayRef size, bool implicit); +TORCH_API std::vector compute_shape_expand(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); +TORCH_API std::vector compute_shape_flip(const at::Tensor & self, at::IntArrayRef dims); +TORCH_API std::vector compute_shape_glu_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +TORCH_API std::vector compute_shape_glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); +TORCH_API std::vector compute_shape_grid_sampler_2d(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +TORCH_API std::vector compute_shape_grid_sampler_2d_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); +TORCH_API std::vector compute_shape_index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index); +TORCH_API std::vector compute_shape_inverse(const at::Tensor & self); +TORCH_API std::vector compute_shape_isnan(const at::Tensor & self); +TORCH_API std::vector compute_shape_log_sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); +TORCH_API std::vector compute_shape_log_sigmoid_forward(const at::Tensor & self); +TORCH_API std::vector compute_shape_logdet(const at::Tensor & self); +TORCH_API std::vector compute_shape_logical_and(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_logical_not(const at::Tensor & self); +TORCH_API std::vector compute_shape_logical_or(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_logical_xor(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Scalar & value); +TORCH_API std::vector compute_shape_masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & value); +TORCH_API std::vector compute_shape_max(const at::Tensor & self); +TORCH_API std::vector compute_shape_mean(const at::Tensor & self, ::std::optional dtype); +TORCH_API std::vector compute_shape_min(const at::Tensor & self); +TORCH_API std::vector compute_shape_mv(const at::Tensor & self, const at::Tensor & vec); +TORCH_API std::vector compute_shape_native_batch_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps); +TORCH_API std::vector compute_shape_native_batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask); +TORCH_API std::vector compute_shape_native_dropout(const at::Tensor & input, double p, ::std::optional train); +TORCH_API std::vector compute_shape_native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale); +TORCH_API std::vector compute_shape_native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +TORCH_API std::vector compute_shape_native_layer_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, at::IntArrayRef normalized_shape, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask); +TORCH_API std::vector compute_shape_new_empty_strided(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API std::vector compute_shape_nll_loss2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API std::vector compute_shape_nll_loss2d_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +TORCH_API std::vector compute_shape_nonzero(const at::Tensor & self); +TORCH_API std::vector compute_shape_normal_functional(const at::Tensor & self, double mean, double std, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, int64_t to, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); +TORCH_API std::vector compute_shape_relu(const at::Tensor & self); +TORCH_API std::vector compute_shape_repeat(const at::Tensor & self, at::IntArrayRef repeats); +TORCH_API std::vector compute_shape_slogdet(const at::Tensor & self); +TORCH_API std::vector compute_shape_smooth_l1_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +TORCH_API std::vector compute_shape_sort(const at::Tensor & self, int64_t dim, bool descending); +TORCH_API std::vector compute_shape_stack(at::TensorList tensors, int64_t dim); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, bool unbiased); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); +TORCH_API std::vector compute_shape_sum(const at::Tensor & self, ::std::optional dtype); +TORCH_API std::vector compute_shape__to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +TORCH_API std::vector compute_shape_take(const at::Tensor & self, const at::Tensor & index); +TORCH_API std::vector compute_shape_trace(const at::Tensor & self); +TORCH_API std::vector compute_shape_zero(const at::Tensor & self); +TORCH_API std::vector compute_shape_narrow_copy_symint(const at::Tensor & self, int64_t dim, int64_t start, c10::SymInt length); +TORCH_API std::vector compute_shape_hardswish(const at::Tensor & self); +TORCH_API std::vector compute_shape_hardswish_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape_selu(const at::Tensor & self); +TORCH_API std::vector compute_shape_uniform(const at::Tensor & self, double from, double to, ::std::optional generator); + +// Non-Native ops +TORCH_API std::vector compute_shape_scalar(const at::Scalar& value, const at::ScalarType& type); +TORCH_API std::vector compute_shape_expand(const Output& input0, const std::vector& size, const bool& is_scalar_expand); +TORCH_API std::vector compute_shape_view(const Output& input0, const std::vector& output_sizes); +TORCH_API std::vector compute_shape_cast(const Output& input0, const at::ScalarType& dtype, const ::std::optional& stype); + +// View Ops +// (Now that functionalization pass is used, we should kill these in a later PR) +TORCH_API std::vector compute_shape_as_strided_view_update(const Output& target, const Output& input, const std::vector& size, const std::vector& stride, const int64_t& storage_offset); +TORCH_API std::vector compute_shape_as_strided(const Output& input, const std::vector& size, const std::vector& stride, const int64_t& storage_offset); +TORCH_API std::vector compute_shape_diagonal_view_update(const Output& target, const Output& input, const int64_t& offset, const int64_t& dim1, const int64_t& dim2); +TORCH_API std::vector compute_shape_diagonal(const Output& input, const int64_t& offset, const int64_t& dim1, const int64_t& dim2); +TORCH_API std::vector compute_shape_narrow_view_update(const Output& input, const Output& source, const std::vector& base_indices); +TORCH_API std::vector compute_shape_narrow(const Output& input, const std::vector& base_indices, const std::vector& sizes); +TORCH_API std::vector compute_shape_permute(const Output& input, const std::vector& dims); +TORCH_API std::vector compute_shape_resize(const Output& input, const std::vector& size); +TORCH_API std::vector compute_shape_select_view_update(const Output& target, const Output& source, const int64_t& dim, const int64_t& start, const int64_t& end, const int64_t& stride); +TORCH_API std::vector compute_shape_select(const Output& input, const int64_t& dim, const int64_t& start, const int64_t& end, const int64_t& stride); +TORCH_API std::vector compute_shape_squeeze(const Output& input, const int& dim); +TORCH_API std::vector compute_shape_unsqueeze(const Output& input, const int& dim); + +TORCH_API std::vector compute_shape_select_scatter(const at::Tensor & self, const at::Tensor & src, int64_t dim, int64_t index); +TORCH_API std::vector compute_shape_diagonal_scatter(const at::Tensor & self, const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); +TORCH_API std::vector compute_shape_slice_scatter_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +TORCH_API std::vector compute_shape_as_strided_scatter_symint(const at::Tensor & self, const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +// clang-format on +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..16a363079fa8fe784f82d8b0dc391264e6edd76f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h @@ -0,0 +1,270 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API SymNodeImpl : public c10::SymNodeImpl { + public: + SymNodeImpl(NodePtr ptr) : node_(std::move(ptr)) {} + NodePtr node_; +}; + +class LazyTensor; +using LazyTensorPtr = c10::intrusive_ptr; + +class TORCH_API LazyTensor : public c10::intrusive_ptr_target { + public: + // This is the core lazy tensor data structure where all the tensor data is + // held. The lazy tensor is nothing more than a shared pointer to a Data + // object. + struct Data { + Data(BackendDataPtr handle, BackendDevice device) + : handle(std::move(handle)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + Data(Value ir_value, BackendDevice device) + : ir_value(std::move(ir_value)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + Data(at::Tensor tensor_data, BackendDevice device) + : tensor_data(std::move(tensor_data)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + // TODO(alanwaketan): Remove this ctor. This is a + // temporary ctor to ease XLA LTC migration. It depends on + // XLA's Functionalization integration. + Data(BackendDevice device) + : device(std::move(device)), unique_id(GetNextTensorId()) {} + + Data(Data&& other) = delete; + Data(const Data&) = delete; + Data& operator=(const Data&) = delete; + Data& operator=(Data&&) = delete; + virtual ~Data(); + + BackendDataPtr handle; + Value ir_value; + std::optional tensor_data; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const BackendDevice device; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int64_t unique_id = 0; + size_t generation = 1; + }; + + static LazyTensorPtr Create( + const at::Tensor& tensor, + const BackendDevice& device); + static LazyTensorPtr Create(Value ir_value, const BackendDevice& device); + static LazyTensorPtr Create(const BackendDataPtr& handle); + static LazyTensorPtr Create(std::shared_ptr data); + + // The default ctor previously created a null LazyTensor (one with no 'data' + // obj). Creating a null LazyTensor is no longer possible, since the same can + // be achieved by creating a null LazyTensorPtr and it is way too confusing to + // have to check both lazy_tensor_ptr && *lazy_tensor_ptr, so everywhere that + // used to rely on a LazyTensor obj with a null Data can now rely on a null + // LazyTensorPtr instead. + LazyTensor() = delete; + LazyTensor(const LazyTensor&) = default; + LazyTensor(LazyTensor&&) noexcept = default; + LazyTensor& operator=(const LazyTensor&) = default; + LazyTensor& operator=(LazyTensor&&) noexcept = default; + + ~LazyTensor() override = default; + + size_t generation() const { + return data()->generation; + } + + // Override it to use your own Shape. + virtual int64_t size(int64_t dim) const; + + // Override it to use your own graph executor. + virtual at::Tensor ToTensor(bool detached); + + void ShallowCopyTo(const LazyTensorPtr& dest) const; + + // Assigns the tensor value to the lazy tensor. + void SetTensor(at::Tensor tensor); + + void UpdateFromTensor(const at::Tensor& tensor, bool sync); + void UpdateFromTensorOut(const at::Tensor& tensor); + void UpdateFromTensorOut(const LazyTensorPtr& tensor); + + const std::shared_ptr& data() const; + + // Override it to use your own type conversion. + virtual at::ScalarType dtype() const; + + MaybeRef shape() const; + + const BackendDevice& GetDevice() const; + int64_t GetUniqueId() const; + + // Fetches the data behind the tensor. If the tensor has a graph defining + // its current value, executes the graph and fetches the data result. + BackendDataPtr GetDataHandle(); + + // Fetches the current value of the data, which can be missing (nullptr) + // in case the tensor has a graph defining its current value, + BackendDataPtr CurrentDataHandle() const; + + void SetDataHandle(BackendDataPtr handle); + void SetDataHandle(BackendDataPtr handle, bool sync); + + // Retrieves the current IR Node, or nullptr in case no active IR Node is + // available. + Value CurrentIrValue() const; + + // Retrieves the IR Node representing this LazyTensor. One will be created if + // missing. Note that although this is a const API, it actually changes the + // internal state of the object. + Value GetIrValue() const; + + void SetIrValue(Value ir_value); + void SetInPlaceIrValue(Value ir_value); + + std::optional CurrentTensorData() const; + + std::vector MakeOutputTensors(const NodePtr& node) const; + + LazyTensorPtr CopyTensorToDevice(const BackendDevice& device); + + // Applies the queue of operations in preparation for using the data. + // Override it to use your own graph executor. + virtual void ApplyPendingGraph(); + + // Override it to set extra information. + virtual void AssignIrValue(Value ir_value) const; + + protected: + explicit LazyTensor(std::shared_ptr data); + + void SetTensorData(at::Tensor tensor_data); + + // We build a graph accumulating operations, but at a given point we + // need to force a rendering, otherwise the graph can grow without control. + // Think: + // for i in range(0, 100000): + // a = a + b + void TryLimitGraphSize(); + + // Override it to instantiate your own data. + virtual Value GetIrValueForTensor( + const at::Tensor& tensor, + const BackendDevice& device) const; + + Value CreateTensorNode(const BackendDataPtr& data, bool read_only) const; + + private: + LazyTensor(const at::Tensor& tensor, const BackendDevice& device); + LazyTensor(Value ir_value, const BackendDevice& device); + explicit LazyTensor(const BackendDataPtr& handle); + + static int64_t GetNextTensorId(); + + std::shared_ptr data_; +}; + +// Utils to convert at::Tensor to LazyTensor, and vice versa. + +// Section 0: c10::Tensorlist ==> lazy::TensorList +// note: GetTensorList is not totally parallel to GetLtcTensor; A TensorList +// skips +// the LazyTensor wrappers, assuming that the list of underlying IR nodes +// is actually more useful for downstream computations. TBD. +TORCH_API torch::lazy::Value GetTensorList(at::ITensorListRef tensors); + +// Section 1: at::Tensor => LazyTensor. +// Extracts the LazyTensor out of an at::Tensor. Returns a null LazyTensor +// if the tensor is not a lazy tensor. +TORCH_API LazyTensorPtr TryGetLtcTensor(const at::Tensor& tensor); + +// Extracts the LazyTensor out of an at::Tensor. Throws an exception +// if the tensor is not a lazy tensor. +TORCH_API LazyTensorPtr GetLtcTensor(const at::Tensor& tensor); + +// Same as above, applied to a list of tensors. +TORCH_API std::vector GetLtcTensors( + c10::ArrayRef tensors); + +// If tensor is a lazy tensor type, returns the LazyTensor embedded within it, +// otherwise creates a new lazy tensor type with tensor as data. +TORCH_API LazyTensorPtr GetOrCreateLtcTensor( + const std::optional& tensor, + const BackendDevice& device); + +TORCH_API LazyTensorPtr GetLtcTensorOrCreateForWrappedNumber( + const at::Tensor& tensor, + const BackendDevice& device); + +// Section 2: LazyTensor => at::Tensor. +// Creates an ATen tensor from an LazyTensor. +TORCH_API at::Tensor CreateAtenFromLtcTensor(const LazyTensorPtr& ltc_tensor); +TORCH_API at::Tensor CreateAtenFromLtcTensor(LazyTensor&& ltc_tensor); + +// Note [Lazy Tensor Functionalization] +// The functionalization pass is implemented by wrapping all TensorImpl +// objects in C++ with an extra FunctionalTensorWrapper object, +// that knows how to perform functionalization +// +// Certain functions in the aten API serve as entry/exit points for +// functionalization, where we need to perform the wrapping/unwrapping: +// - aten::to.device +// - aten::empty + +// Given a non-lazy tensor, this function creates a lazy tensor on the specified +// (lazy) device. The functionalize_output determines whether or not we should +// wrap the output in a "functional wrapper". +// +// How do you know whether to pass true/false for functionalize_output? +// +// Case 1: nonlazy -> lazy +// If you're implementing a function that takes in nonlazy tensors and returns +// lazy tensors, then you should think of that function as an "entrypoint" to +// functionalization, and use functionalize_output=true Examples include: +// - factory functions (the LTC kernel for at::empty) +// - CPU -> Lazy device conversions (the LTC kernel for at::to_device) +// +// Case 2: lazy -> lazy +// If you're implementing a function that takes in lazy tensors and returns +// lazy tensors, +// **but** requires creating lazy tensors internally, +// then you can assume that the current function is running inside of some +// outer context where functionalization is already running, that will take +// care of doing the wrapping for you, and use functionalize_output=true +// Examples include: +// - CPU fallback (takes in lazy tensors, converts to cpu, calls kernel, +// converts returns back to lazy tensors). +TORCH_API at::Tensor to_lazy_tensor( + const at::Tensor& self, + const c10::TensorOptions& options, + at::Device device, + bool non_blocking, + bool functionalize_output); + +template +auto TupleAtenFromLtcTensorsImpl( + const std::vector& tensors, + std::index_sequence /*unused*/) { + return std::make_tuple(CreateAtenFromLtcTensor(tensors[Indices])...); +} + +template +auto TupleAtenFromLtcTensors(const std::vector& tensors) { + return TupleAtenFromLtcTensorsImpl(tensors, std::make_index_sequence{}); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..161008918c8ccb22011e0f13ef2cae0e16f5b133 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +// Tensor implementation class used to be fed to the at::Tensor. +// Its scope is just to handle an LazyTensor. +class TORCH_API LTCTensorImpl final : public c10::TensorImpl { + public: + explicit LTCTensorImpl(const LazyTensorPtr& tensor); + explicit LTCTensorImpl(const LazyTensor& tensor); + explicit LTCTensorImpl(LazyTensor&& tensor); + + LazyTensorPtr tensor() { + return tensor_; + } + + void set_tensor(const LazyTensorPtr& lazy_tensor); + + void force_refresh_sizes() { + generation_ = 0; + } + + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override; + + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override; + + void shallow_copy_from(const c10::intrusive_ptr& impl) override; + + at::IntArrayRef sizes_custom() const override; + at::IntArrayRef strides_custom() const override; + int64_t numel_custom() const override; + int64_t storage_offset_custom() const override; + int64_t dim_custom() const override; + bool is_strides_like_custom(at::MemoryFormat memory_format) const override; + c10::SymBool sym_is_non_overlapping_and_dense_custom() const override; + + c10::SymBool sym_is_contiguous_custom( + at::MemoryFormat memory_format) const override; + c10::SymIntArrayRef sym_sizes_custom() const override; + c10::SymIntArrayRef sym_strides_custom() const override; + c10::SymInt sym_numel_custom() const override; + + private: + void setup_size_properties(); + + LazyTensorPtr tensor_; + mutable std::optional> sym_sizes_; + size_t generation_{0}; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h new file mode 100644 index 0000000000000000000000000000000000000000..e47484f16265b9675645aa947f06d5cdf59d54cf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h @@ -0,0 +1,81 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include + +namespace torch::lazy { + +TORCH_API std::vector ComputeArrayStrides( + c10::ArrayRef sizes); + +TORCH_API std::vector DataHandlesToTensors( + c10::ArrayRef data_handles, + at::ScalarType dest_element_type); + +// Uploads an ATEN tensor data to the device and fetches the corresponding +// device data handle. +TORCH_API BackendDataPtr +TensorToDataHandle(const at::Tensor& tensor, const BackendDevice& device); + +// Retrieves the device data handles by parallel uploading data onto the +// corresponding devices. +TORCH_API std::vector CreateTensorsData( + const std::vector& tensors, + const std::vector& devices); + +// Makes a deep copy of an ATEN tensor. +inline at::Tensor CopyTensor(const at::Tensor& ref) { + return ref.to(ref.options(), /*non_blocking=*/false, /*copy=*/true); +} + +// Same as above, with an additional cast. +inline at::Tensor CopyTensor( + const at::Tensor& ref, + at::ScalarType dest_type, + bool copy = true) { + return ref.to(ref.options().dtype(dest_type), /*non_blocking=*/false, copy); +} + +template +T OptionalOr(const std::optional& value, T defval) { + return value ? static_cast(*value) : defval; +} + +// Unwraps tensor to target dtype if it's a wrapped number. +inline at::Tensor UnwrapNumber(const at::Tensor& tensor, at::ScalarType dtype) { + return tensor.unsafeGetTensorImpl()->is_wrapped_number() ? tensor.to(dtype) + : tensor; +} + +template +at::Scalar MakeIntScalar(T value) { + return at::Scalar(static_cast(value)); +} + +// Routing values to device data maximizes the changes for compilation cache +// hits, but it can prevent the compiler to perform optimizations. So tensor +// values which are within a given set, are routed to constant scalars if this +// API returns true. +TORCH_API bool IsSpecialScalar(const at::Scalar& value); + +// Note: returns a reference instead of a fresh tensor to avoid refcount bumps. +inline const at::Tensor& maybe_unwrap_functional(const at::Tensor& tensor) { + if (at::functionalization::impl::isFunctionalTensor(tensor)) { + return at::functionalization::impl::unsafeGetFunctionalWrapper(tensor) + ->value(); + } else { + return tensor; + } +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..ac54c00f81a1bc0b7d47ce5d6402b98f1007f104 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h + */ + +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class TORCH_API Completion { + public: + class Data; + + explicit Completion(std::shared_ptr data); + + ~Completion(); + + void Wait(); + + private: + std::shared_ptr data_; +}; + +// Schedules a closure which might wait for IO or other events/conditions. +TORCH_API void ScheduleIoClosure(std::function closure); +TORCH_API Completion +ScheduleIoClosureWithCompletion(std::function closure); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h new file mode 100644 index 0000000000000000000000000000000000000000..ca5fc4645c2e69fd3894c382a7fcf47dc64ff398 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::lazy { + +struct TORCH_API TrieNode { + static size_t GetNextUniqueId() { + static thread_local size_t id_generator = 0; + return id_generator++; + } + + size_t unique_id; + size_t hit_counter; + NodePtr ir_node; + std::list> successors; + + TrieNode() : unique_id(GetNextUniqueId()), hit_counter(0), ir_node(nullptr) {} + explicit TrieNode(NodePtr node) + : unique_id(GetNextUniqueId()), + hit_counter(0), + ir_node(std::move(node)) {} +}; + +class TORCH_API TrieCache { + public: + static TrieCache* Get(); + + TrieNode* Current() const; + // Take an iterator as the input because we want to move the corresponding + // node in the successor list to achieve a LRU caching effect + void SetCurrent(std::list>::iterator& iter); + // Used in MarkStep to indicate the end of one tracing + void ResetCurrent(); + + // Create a new TrieNode for ir_node and insert into the TrieCache + void Insert(NodePtr ir_node); + + // Clear all TrieCache nodes + // TODO: Because we don't expect user to explicitly call this function via + // a Python API, we may need to introduce a threshold on the size of the cache + // to avoid holding tensors for too long. + void Clear(); + + void DumpToDotFile(const std::string& file_name); + + private: + TrieCache(); + + std::shared_ptr root_; + TrieNode* current_; +}; + +template +NodePtr LookupNodeFromTrieCache(Args&&... args) { + auto& successors = TrieCache::Get()->Current()->successors; + for (auto it = successors.begin(); it != successors.end(); it++) { + NodePtr ir_node = (*it)->ir_node; + const T* concrete_node = NodeCast(ir_node.get()); + if (concrete_node && + concrete_node->CanBeReused(std::forward(args)...)) { + TORCH_LAZY_COUNTER( + "IrNodeReused_" + c10::demangle((typeid(T).name())), 1); + (*it)->hit_counter++; + TrieCache::Get()->SetCurrent(it); + return ir_node; + } + } + return nullptr; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h new file mode 100644 index 0000000000000000000000000000000000000000..718cac504751dc9b08fc72d69fcf91dbfbe77376 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Unique in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/unique.h + */ + +#pragma once + +#include + +#include +#include + +namespace torch::lazy { + +// Helper class to allow tracking zero or more things, which should be forcibly +// be one only thing. +template > +class Unique { + public: + std::pair set(const T& value) { + if (value_) { + TORCH_CHECK(C()(*value_, value), "'", *value_, "' vs '", value); + return std::pair(false, *value_); + } + value_ = value; + return std::pair(true, *value_); + } + + operator bool() const { + return value_.has_value(); + } + operator const T&() const { + return *value_; + } + const T& operator*() const { + return *value_; + } + const T* operator->() const { + return value_.operator->(); + } + + std::set AsSet() const { + std::set vset; + if (value_.has_value()) { + vset.insert(*value_); + } + return vset; + } + + private: + std::optional value_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h new file mode 100644 index 0000000000000000000000000000000000000000..4324148de300340fa58151cc73ee6032f1f530ae --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Most of the utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/util.h + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::lazy { + +// Similar to c10::scope_exit but with a status. +// TODO(alanwaketan): Consolidate it with c10::scope_exit. +template +class Cleanup { + public: + using StatusType = T; + + explicit Cleanup(std::function&& func) + : func_(std::move(func)) {} + Cleanup(Cleanup&& ref) noexcept + : func_(std::move(ref.func_)), status_(std::move(ref.status_)) {} + Cleanup(const Cleanup&) = delete; + + ~Cleanup() { + if (func_ != nullptr) { + func_(std::move(status_)); + } + } + + Cleanup& operator=(const Cleanup&) = delete; + + Cleanup& operator=(Cleanup&& ref) noexcept { + if (this != &ref) { + func_ = std::move(ref.func_); + status_ = std::move(ref.status_); + } + return *this; + } + + void Release() { + func_ = nullptr; + } + + void SetStatus(StatusType&& status) { + status_ = std::move(status); + } + + const StatusType& GetStatus() const { + return status_; + } + + private: + std::function func_; + StatusType status_; +}; + +using ExceptionCleanup = Cleanup; + +// Allows APIs which might return const references and values, to not be forced +// to return values in the signature. +// TODO(alanwaketan): This is clever, but is there really no std or c10 +// supports? Needs more investigations. +template +class MaybeRef { + public: + /* implicit */ MaybeRef(const T& ref) : ref_(ref) {} + /* implicit */ MaybeRef(T&& value) + : storage_(std::move(value)), ref_(*storage_) {} + + const T& Get() const { + return ref_; + } + const T& operator*() const { + return Get(); + } + operator const T&() const { + return Get(); + } + + bool IsStored() const { + return storage_.has_value(); + } + + private: + std::optional storage_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const T& ref_; +}; + +template +std::vector Iota(size_t size, T init = 0, T incr = 1) { + std::vector result(size); + T value = init; + for (size_t i = 0; i < size; ++i, value += incr) { + result[i] = value; + } + return result; +} + +template +std::vector ToVector(const S& input) { + return std::vector(input.begin(), input.end()); +} + +template +std::optional> ToOptionalVector( + c10::OptionalArrayRef arrayRef) { + if (arrayRef) { + return arrayRef->vec(); + } + return std::nullopt; +} + +template +std::underlying_type_t GetEnumValue(T value) { + return static_cast>(value); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h new file mode 100644 index 0000000000000000000000000000000000000000..dfd6a881958c2d7eb0cbbe5006d4302669450d4d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h @@ -0,0 +1,10312 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// This file contains autogenerated LazyTensor IR nodes +#include +#include +#include +#include +#include +#include +#include +#include "torch/csrc/lazy/ts_backend/ts_node.h" + +namespace torch { +namespace lazy { +using at::operator<<; + +// kNullValue is used to contribute a static hash value any time +// a node has an Optional input that is nullopt. It is important +// to differentiate between HASH(std::nullopt, something) and HASH(something, std::nullopt), +// and using kNullValue in the hash function in the order of arguments +// serves this purpose. +static const torch::lazy::Value kNullValue = torch::lazy::Value(); + +class AdaptiveAvgPool2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_adaptive_avg_pool2d); + } + + AdaptiveAvgPool2d(const torch::lazy::Value& self, const ::std::vector& output_size, std::vector&& shapes) + : TsNode( + AdaptiveAvgPool2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size)), + output_size(output_size) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + + torch::lazy::TSOpVector _adaptive_avg_pool2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_adaptive_avg_pool2d_out.size(), 1); + + return _adaptive_avg_pool2d_out; + + } + + + ::std::vector output_size; + + +}; + +class AdaptiveAvgPool2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_adaptive_avg_pool2d_backward); + } + + AdaptiveAvgPool2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + AdaptiveAvgPool2dBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector _adaptive_avg_pool2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_adaptive_avg_pool2d_backward_out.size(), 1); + + return _adaptive_avg_pool2d_backward_out; + + } + + + + + +}; + +class LogSoftmax : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_log_softmax); + } + + LogSoftmax(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float, std::vector&& shapes) + : TsNode( + LogSoftmax::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, half_to_float)), + dim(dim), + half_to_float(half_to_float) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", half_to_float=" << half_to_float; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->half_to_float == half_to_float); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("half_to_float", half_to_float); + + torch::lazy::TSOpVector _log_softmax_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_log_softmax_out.size(), 1); + + return _log_softmax_out; + + } + + + int64_t dim; + bool half_to_float; + + +}; + +class LogSoftmaxBackwardData : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_log_softmax_backward_data); + } + + LogSoftmaxBackwardData(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype, std::vector&& shapes) + : TsNode( + LogSoftmaxBackwardData::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, input_dtype)), + dim(dim), + input_dtype(input_dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", input_dtype=" << input_dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output && + this->dim == dim && + this->input_dtype == input_dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("input_dtype", input_dtype); + + torch::lazy::TSOpVector _log_softmax_backward_data_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_log_softmax_backward_data_out.size(), 1); + + return _log_softmax_backward_data_out; + + } + + + int64_t dim; + at::ScalarType input_dtype; + + +}; + +class ReshapeAliasCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_reshape_alias_copy); + } + + ReshapeAliasCopy(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, std::vector&& shapes) + : TsNode( + ReshapeAliasCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + + torch::lazy::TSOpVector _reshape_alias_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_reshape_alias_copy_out.size(), 1); + + return _reshape_alias_copy_out; + + } + + + ::std::vector size; + ::std::vector stride; + + +}; + +class Softmax : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_softmax); + } + + Softmax(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float, std::vector&& shapes) + : TsNode( + Softmax::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, half_to_float)), + dim(dim), + half_to_float(half_to_float) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", half_to_float=" << half_to_float; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->half_to_float == half_to_float); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("half_to_float", half_to_float); + + torch::lazy::TSOpVector _softmax_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_softmax_out.size(), 1); + + return _softmax_out; + + } + + + int64_t dim; + bool half_to_float; + + +}; + +class SoftmaxBackwardData : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_softmax_backward_data); + } + + SoftmaxBackwardData(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype, std::vector&& shapes) + : TsNode( + SoftmaxBackwardData::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, input_dtype)), + dim(dim), + input_dtype(input_dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", input_dtype=" << input_dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output && + this->dim == dim && + this->input_dtype == input_dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("input_dtype", input_dtype); + + torch::lazy::TSOpVector _softmax_backward_data_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_softmax_backward_data_out.size(), 1); + + return _softmax_backward_data_out; + + } + + + int64_t dim; + at::ScalarType input_dtype; + + +}; + +class Abs : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::abs); + } + + Abs(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Abs::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector abs_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(abs_out.size(), 1); + + return abs_out; + + } + + + + + +}; + +class AddTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::add); + } + + AddTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + AddTensor::ClassOpKind(), + OpList{self, other, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector add_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(add_out.size(), 1); + + return add_out; + + } + + + + + +}; + +class Addcdiv : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addcdiv); + } + + Addcdiv(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Addcdiv::ClassOpKind(), + OpList{self, tensor1, tensor2, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == tensor1 && + operand(i++) == tensor2 && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("value", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addcdiv_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addcdiv_out.size(), 1); + + return addcdiv_out; + + } + + + + + +}; + +class Addcmul : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addcmul); + } + + Addcmul(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Addcmul::ClassOpKind(), + OpList{self, tensor1, tensor2, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == tensor1 && + operand(i++) == tensor2 && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("value", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addcmul_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addcmul_out.size(), 1); + + return addcmul_out; + + } + + + + + +}; + +class Addmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addmm); + } + + Addmm(const torch::lazy::Value& self, const torch::lazy::Value& mat1, const torch::lazy::Value& mat2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + Addmm::ClassOpKind(), + OpList{self, mat1, mat2, beta, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat1, const torch::lazy::Value& mat2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat1 && + operand(i++) == mat2 && + operand(i++) == beta && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("beta", loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addmm_out.size(), 1); + + return addmm_out; + + } + + + + + +}; + +class AliasCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::alias_copy); + } + + AliasCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + AliasCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector alias_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(alias_copy_out.size(), 1); + + return alias_copy_out; + + } + + + + + +}; + +class All : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::all); + } + + All(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + All::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector all_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(all_out.size(), 1); + + return all_out; + + } + + + + + +}; + +class Any : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::any); + } + + Any(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Any::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector any_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(any_out.size(), 1); + + return any_out; + + } + + + + + +}; + +class ArangeStartOut : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::arange); + } + + ArangeStartOut(const torch::lazy::Value& start, const torch::lazy::Value& end, const torch::lazy::Value& step, const torch::lazy::Value& out, std::vector&& shapes) + : TsNode( + ArangeStartOut::ClassOpKind(), + OpList{start, end, step, out}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& start, const torch::lazy::Value& end, const torch::lazy::Value& step, const torch::lazy::Value& out) const { + size_t i = 0; + return (operand(i++) == start && + operand(i++) == end && + operand(i++) == step && + operand(i++) == out); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("out", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector arange_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(arange_out.size(), 1); + + return arange_out; + + } + + + + + +}; + +class AsStridedCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::as_strided_copy); + } + + AsStridedCopy(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset, std::vector&& shapes) + : TsNode( + AsStridedCopy::ClassOpKind(), + OpList{self, storage_offset.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + has_storage_offset = !!storage_offset; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == storage_offset.value_or(kNullValue) && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + arguments.emplace_back(has_storage_offset ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector as_strided_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(as_strided_copy_out.size(), 1); + + return as_strided_copy_out; + + } + + + ::std::vector size; + ::std::vector stride; + bool has_storage_offset: 1; + +}; + +class AsStridedScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::as_strided_scatter); + } + + AsStridedScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset, std::vector&& shapes) + : TsNode( + AsStridedScatter::ClassOpKind(), + OpList{self, src, storage_offset.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + has_storage_offset = !!storage_offset; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + nullable_operand(i++) == storage_offset.value_or(kNullValue) && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + arguments.emplace_back(has_storage_offset ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector as_strided_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(as_strided_scatter_out.size(), 1); + + return as_strided_scatter_out; + + } + + + ::std::vector size; + ::std::vector stride; + bool has_storage_offset: 1; + +}; + +class AvgPool2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::avg_pool2d); + } + + AvgPool2d(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override, std::vector&& shapes) + : TsNode( + AvgPool2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + ceil_mode(ceil_mode), + count_include_pad(count_include_pad), + divisor_override(divisor_override) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", ceil_mode=" << ceil_mode; + ss << ", count_include_pad=" << count_include_pad; + if (divisor_override.has_value()) { + ss << ", divisor_override=" << divisor_override.value(); + } else { + ss << ", divisor_override=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override) const { + size_t i = 0; + return (operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->ceil_mode == ceil_mode && + this->count_include_pad == count_include_pad && + ((!this->divisor_override&&!divisor_override) || (this->divisor_override&&divisor_override && *(this->divisor_override) == *divisor_override))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back("count_include_pad", count_include_pad); + arguments.emplace_back("divisor_override", divisor_override); + + torch::lazy::TSOpVector avg_pool2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(avg_pool2d_out.size(), 1); + + return avg_pool2d_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + bool ceil_mode; + bool count_include_pad; + ::std::optional divisor_override; + + +}; + +class AvgPool2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::avg_pool2d_backward); + } + + AvgPool2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override, std::vector&& shapes) + : TsNode( + AvgPool2dBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + ceil_mode(ceil_mode), + count_include_pad(count_include_pad), + divisor_override(divisor_override) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", ceil_mode=" << ceil_mode; + ss << ", count_include_pad=" << count_include_pad; + if (divisor_override.has_value()) { + ss << ", divisor_override=" << divisor_override.value(); + } else { + ss << ", divisor_override=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->ceil_mode == ceil_mode && + this->count_include_pad == count_include_pad && + ((!this->divisor_override&&!divisor_override) || (this->divisor_override&&divisor_override && *(this->divisor_override) == *divisor_override))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back("count_include_pad", count_include_pad); + arguments.emplace_back("divisor_override", divisor_override); + + torch::lazy::TSOpVector avg_pool2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(avg_pool2d_backward_out.size(), 1); + + return avg_pool2d_backward_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + bool ceil_mode; + bool count_include_pad; + ::std::optional divisor_override; + + +}; + +class Baddbmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::baddbmm); + } + + Baddbmm(const torch::lazy::Value& self, const torch::lazy::Value& batch1, const torch::lazy::Value& batch2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + Baddbmm::ClassOpKind(), + OpList{self, batch1, batch2, beta, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& batch1, const torch::lazy::Value& batch2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == batch1 && + operand(i++) == batch2 && + operand(i++) == beta && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("beta", loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector baddbmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(baddbmm_out.size(), 1); + + return baddbmm_out; + + } + + + + + +}; + +class Bernoulli : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bernoulli); + } + + Bernoulli(const torch::lazy::Value& self, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Bernoulli::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(generator)), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector bernoulli_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bernoulli_out.size(), 1); + + return bernoulli_out; + + } + + + ::std::optional generator; + + +}; + +class BernoulliP : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bernoulli); + } + + BernoulliP(const torch::lazy::Value& self, const double& p, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + BernoulliP::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(p, generator)), + p(p), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", p=" << p; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& p, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->p == p && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("p", p); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector bernoulli_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bernoulli_out.size(), 1); + + return bernoulli_out; + + } + + + double p; + ::std::optional generator; + + +}; + +class BinaryCrossEntropy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::binary_cross_entropy); + } + + BinaryCrossEntropy(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, std::vector&& shapes) + : TsNode( + BinaryCrossEntropy::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction)), + reduction(reduction) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + + torch::lazy::TSOpVector binary_cross_entropy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(binary_cross_entropy_out.size(), 1); + + return binary_cross_entropy_out; + + } + + + int64_t reduction; + bool has_weight: 1; + +}; + +class BinaryCrossEntropyBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::binary_cross_entropy_backward); + } + + BinaryCrossEntropyBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, std::vector&& shapes) + : TsNode( + BinaryCrossEntropyBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction)), + reduction(reduction) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + + torch::lazy::TSOpVector binary_cross_entropy_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(binary_cross_entropy_backward_out.size(), 1); + + return binary_cross_entropy_backward_out; + + } + + + int64_t reduction; + bool has_weight: 1; + +}; + +class BitwiseAndTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bitwise_and); + } + + BitwiseAndTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + BitwiseAndTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bitwise_and_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bitwise_and_out.size(), 1); + + return bitwise_and_out; + + } + + + + + +}; + +class BitwiseOrTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bitwise_or); + } + + BitwiseOrTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + BitwiseOrTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bitwise_or_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bitwise_or_out.size(), 1); + + return bitwise_or_out; + + } + + + + + +}; + +class Bmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bmm); + } + + Bmm(const torch::lazy::Value& self, const torch::lazy::Value& mat2, std::vector&& shapes) + : TsNode( + Bmm::ClassOpKind(), + OpList{self, mat2}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bmm_out.size(), 1); + + return bmm_out; + + } + + + + + +}; + +class Cat : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cat); + } + + Cat(const torch::lazy::Value& tensors, const int64_t& dim, std::vector&& shapes) + : TsNode( + Cat::ClassOpKind(), + OpList{tensors}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& tensors, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == tensors && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector cat_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cat_out.size(), 1); + + return cat_out; + + } + + + int64_t dim; + + +}; + +class Clamp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::clamp); + } + + Clamp(const torch::lazy::Value& self, const ::std::optional& min, const ::std::optional& max, std::vector&& shapes) + : TsNode( + Clamp::ClassOpKind(), + OpList{self, min.value_or(kNullValue), max.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + has_min = !!min; + has_max = !!max; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& min, const ::std::optional& max) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == min.value_or(kNullValue) && + nullable_operand(i++) == max.value_or(kNullValue)); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_min ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_max ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector clamp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(clamp_out.size(), 1); + + return clamp_out; + + } + + + + bool has_min: 1; + bool has_max: 1; + +}; + +class ClampMin : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::clamp_min); + } + + ClampMin(const torch::lazy::Value& self, const torch::lazy::Value& min, std::vector&& shapes) + : TsNode( + ClampMin::ClassOpKind(), + OpList{self, min}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& min) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == min); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector clamp_min_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(clamp_min_out.size(), 1); + + return clamp_min_out; + + } + + + + + +}; + +class ConstantPadNd : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::constant_pad_nd); + } + + ConstantPadNd(const torch::lazy::Value& self, const ::std::vector& pad, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + ConstantPadNd::ClassOpKind(), + OpList{self, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(pad)), + pad(pad) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", pad=" << pad; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& pad, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == value && + this->pad == pad); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("pad", pad); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector constant_pad_nd_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(constant_pad_nd_out.size(), 1); + + return constant_pad_nd_out; + + } + + + ::std::vector pad; + + +}; + +class Convolution : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::convolution); + } + + Convolution(const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional& bias, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, std::vector&& shapes) + : TsNode( + Convolution::ClassOpKind(), + OpList{input, weight, bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(stride, padding, dilation, transposed, output_padding, groups)), + stride(stride), + padding(padding), + dilation(dilation), + transposed(transposed), + output_padding(output_padding), + groups(groups) + { + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", transposed=" << transposed; + ss << ", output_padding=" << output_padding; + ss << ", groups=" << groups; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional& bias, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups) const { + size_t i = 0; + return (operand(i++) == input && + operand(i++) == weight && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->transposed == transposed && + this->output_padding == output_padding && + this->groups == groups); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(9); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("transposed", transposed); + arguments.emplace_back("output_padding", output_padding); + arguments.emplace_back("groups", groups); + + torch::lazy::TSOpVector convolution_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(convolution_out.size(), 1); + + return convolution_out; + + } + + + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool transposed; + ::std::vector output_padding; + int64_t groups; + bool has_bias: 1; + +}; + +class ConvolutionBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::convolution_backward); + } + + ConvolutionBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional<::std::vector>& bias_sizes, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + ConvolutionBackward::ClassOpKind(), + OpList{grad_output, input, weight}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(bias_sizes, stride, padding, dilation, transposed, output_padding, groups, output_mask)), + bias_sizes(bias_sizes), + stride(stride), + padding(padding), + dilation(dilation), + transposed(transposed), + output_padding(output_padding), + groups(groups), + output_mask(output_mask) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (bias_sizes.has_value()) { + ss << ", bias_sizes=" << bias_sizes.value(); + } else { + ss << ", bias_sizes=null"; + } + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", transposed=" << transposed; + ss << ", output_padding=" << output_padding; + ss << ", groups=" << groups; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional<::std::vector>& bias_sizes, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == input && + operand(i++) == weight && + ((!this->bias_sizes&&!bias_sizes) || (this->bias_sizes&&bias_sizes && *(this->bias_sizes) == *bias_sizes)) && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->transposed == transposed && + this->output_padding == output_padding && + this->groups == groups && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(11); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("bias_sizes", bias_sizes); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("transposed", transposed); + arguments.emplace_back("output_padding", output_padding); + arguments.emplace_back("groups", groups); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector convolution_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(convolution_backward_out.size(), 3); + + return convolution_backward_out; + + } + + + ::std::optional<::std::vector> bias_sizes; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool transposed; + ::std::vector output_padding; + int64_t groups; + ::std::vector output_mask; + + +}; + +class Cos : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cos); + } + + Cos(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Cos::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector cos_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cos_out.size(), 1); + + return cos_out; + + } + + + + + +}; + +class Cumsum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cumsum); + } + + Cumsum(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Cumsum::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, dtype)), + dim(dim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector cumsum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cumsum_out.size(), 1); + + return cumsum_out; + + } + + + int64_t dim; + ::std::optional dtype; + + +}; + +class DetachCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::detach_copy); + } + + DetachCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + DetachCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector detach_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(detach_copy_out.size(), 1); + + return detach_copy_out; + + } + + + + + +}; + +class DiagonalCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::diagonal_copy); + } + + DiagonalCopy(const torch::lazy::Value& self, const int64_t& offset, const int64_t& dim1, const int64_t& dim2, std::vector&& shapes) + : TsNode( + DiagonalCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(offset, dim1, dim2)), + offset(offset), + dim1(dim1), + dim2(dim2) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", offset=" << offset; + ss << ", dim1=" << dim1; + ss << ", dim2=" << dim2; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& offset, const int64_t& dim1, const int64_t& dim2) const { + size_t i = 0; + return (operand(i++) == self && + this->offset == offset && + this->dim1 == dim1 && + this->dim2 == dim2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("offset", offset); + arguments.emplace_back("dim1", dim1); + arguments.emplace_back("dim2", dim2); + + torch::lazy::TSOpVector diagonal_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(diagonal_copy_out.size(), 1); + + return diagonal_copy_out; + + } + + + int64_t offset; + int64_t dim1; + int64_t dim2; + + +}; + +class DiagonalScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::diagonal_scatter); + } + + DiagonalScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& offset, const int64_t& dim1, const int64_t& dim2, std::vector&& shapes) + : TsNode( + DiagonalScatter::ClassOpKind(), + OpList{self, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(offset, dim1, dim2)), + offset(offset), + dim1(dim1), + dim2(dim2) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", offset=" << offset; + ss << ", dim1=" << dim1; + ss << ", dim2=" << dim2; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& offset, const int64_t& dim1, const int64_t& dim2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + this->offset == offset && + this->dim1 == dim1 && + this->dim2 == dim2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("offset", offset); + arguments.emplace_back("dim1", dim1); + arguments.emplace_back("dim2", dim2); + + torch::lazy::TSOpVector diagonal_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(diagonal_scatter_out.size(), 1); + + return diagonal_scatter_out; + + } + + + int64_t offset; + int64_t dim1; + int64_t dim2; + + +}; + +class DivTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::div); + } + + DivTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + DivTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector div_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(div_out.size(), 1); + + return div_out; + + } + + + + + +}; + +class DivTensorMode : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::div); + } + + DivTensorMode(const torch::lazy::Value& self, const torch::lazy::Value& other, const ::std::optional& rounding_mode, std::vector&& shapes) + : TsNode( + DivTensorMode::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(rounding_mode)), + rounding_mode(rounding_mode.has_value() ? ::std::make_optional(std::string(*rounding_mode)) : ::std::nullopt) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (rounding_mode.has_value()) { + ss << ", rounding_mode=" << rounding_mode.value(); + } else { + ss << ", rounding_mode=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const ::std::optional& rounding_mode) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + ((!this->rounding_mode&&!rounding_mode) || (this->rounding_mode&&rounding_mode && *(this->rounding_mode) == *rounding_mode))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("rounding_mode", rounding_mode); + torch::lazy::TSOpVector div_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(div_out.size(), 1); + + return div_out; + + } + + + ::std::optional rounding_mode; + + +}; + +class Elu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::elu); + } + + Elu(const torch::lazy::Value& self, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, std::vector&& shapes) + : TsNode( + Elu::ClassOpKind(), + OpList{self, alpha, scale, input_scale}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == alpha && + operand(i++) == scale && + operand(i++) == input_scale); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector elu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(elu_out.size(), 1); + + return elu_out; + + } + + + + + +}; + +class EluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::elu_backward); + } + + EluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, const bool& is_result, const torch::lazy::Value& self_or_result, std::vector&& shapes) + : TsNode( + EluBackward::ClassOpKind(), + OpList{grad_output, alpha, scale, input_scale, self_or_result}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(is_result)), + is_result(is_result) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", is_result=" << is_result; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, const bool& is_result, const torch::lazy::Value& self_or_result) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == alpha && + operand(i++) == scale && + operand(i++) == input_scale && + operand(i++) == self_or_result && + this->is_result == is_result); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("is_result", is_result); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector elu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(elu_backward_out.size(), 1); + + return elu_backward_out; + + } + + + bool is_result; + + +}; + +class Embedding : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::embedding); + } + + Embedding(const torch::lazy::Value& weight, const torch::lazy::Value& indices, const int64_t& padding_idx, const bool& scale_grad_by_freq, const bool& sparse, std::vector&& shapes) + : TsNode( + Embedding::ClassOpKind(), + OpList{weight, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(padding_idx, scale_grad_by_freq, sparse)), + padding_idx(padding_idx), + scale_grad_by_freq(scale_grad_by_freq), + sparse(sparse) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", padding_idx=" << padding_idx; + ss << ", scale_grad_by_freq=" << scale_grad_by_freq; + ss << ", sparse=" << sparse; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& weight, const torch::lazy::Value& indices, const int64_t& padding_idx, const bool& scale_grad_by_freq, const bool& sparse) const { + size_t i = 0; + return (operand(i++) == weight && + operand(i++) == indices && + this->padding_idx == padding_idx && + this->scale_grad_by_freq == scale_grad_by_freq && + this->sparse == sparse); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("padding_idx", padding_idx); + arguments.emplace_back("scale_grad_by_freq", scale_grad_by_freq); + arguments.emplace_back("sparse", sparse); + + torch::lazy::TSOpVector embedding_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(embedding_out.size(), 1); + + return embedding_out; + + } + + + int64_t padding_idx; + bool scale_grad_by_freq; + bool sparse; + + +}; + +class EmbeddingDenseBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::embedding_dense_backward); + } + + EmbeddingDenseBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& indices, const int64_t& num_weights, const int64_t& padding_idx, const bool& scale_grad_by_freq, std::vector&& shapes) + : TsNode( + EmbeddingDenseBackward::ClassOpKind(), + OpList{grad_output, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(num_weights, padding_idx, scale_grad_by_freq)), + num_weights(num_weights), + padding_idx(padding_idx), + scale_grad_by_freq(scale_grad_by_freq) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", num_weights=" << num_weights; + ss << ", padding_idx=" << padding_idx; + ss << ", scale_grad_by_freq=" << scale_grad_by_freq; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& indices, const int64_t& num_weights, const int64_t& padding_idx, const bool& scale_grad_by_freq) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == indices && + this->num_weights == num_weights && + this->padding_idx == padding_idx && + this->scale_grad_by_freq == scale_grad_by_freq); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("num_weights", num_weights); + arguments.emplace_back("padding_idx", padding_idx); + arguments.emplace_back("scale_grad_by_freq", scale_grad_by_freq); + + torch::lazy::TSOpVector embedding_dense_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(embedding_dense_backward_out.size(), 1); + + return embedding_dense_backward_out; + + } + + + int64_t num_weights; + int64_t padding_idx; + bool scale_grad_by_freq; + + +}; + +class EqScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::eq); + } + + EqScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + EqScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector eq_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(eq_out.size(), 1); + + return eq_out; + + } + + + + + +}; + +class EqTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::eq); + } + + EqTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + EqTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector eq_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(eq_out.size(), 1); + + return eq_out; + + } + + + + + +}; + +class Exp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::exp); + } + + Exp(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Exp::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector exp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(exp_out.size(), 1); + + return exp_out; + + } + + + + + +}; + +class ExpandCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::expand_copy); + } + + ExpandCopy(const torch::lazy::Value& self, const ::std::vector& size, const bool& implicit, std::vector&& shapes) + : TsNode( + ExpandCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, implicit)), + size(size), + implicit(implicit) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", implicit=" << implicit; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const bool& implicit) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size && + this->implicit == implicit); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + kwarguments.emplace_back("implicit", implicit); + torch::lazy::TSOpVector expand_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(expand_copy_out.size(), 1); + + return expand_copy_out; + + } + + + ::std::vector size; + bool implicit; + + +}; + +class Flip : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::flip); + } + + Flip(const torch::lazy::Value& self, const ::std::vector& dims, std::vector&& shapes) + : TsNode( + Flip::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dims)), + dims(dims) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dims=" << dims; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dims) const { + size_t i = 0; + return (operand(i++) == self && + this->dims == dims); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dims", dims); + + torch::lazy::TSOpVector flip_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(flip_out.size(), 1); + + return flip_out; + + } + + + ::std::vector dims; + + +}; + +class Floor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::floor); + } + + Floor(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Floor::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector floor_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(floor_out.size(), 1); + + return floor_out; + + } + + + + + +}; + +class Frac : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::frac); + } + + Frac(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Frac::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector frac_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(frac_out.size(), 1); + + return frac_out; + + } + + + + + +}; + +class Gather : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gather); + } + + Gather(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const bool& sparse_grad, std::vector&& shapes) + : TsNode( + Gather::ClassOpKind(), + OpList{self, index}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, sparse_grad)), + dim(dim), + sparse_grad(sparse_grad) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", sparse_grad=" << sparse_grad; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const bool& sparse_grad) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + this->dim == dim && + this->sparse_grad == sparse_grad); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("sparse_grad", sparse_grad); + torch::lazy::TSOpVector gather_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gather_out.size(), 1); + + return gather_out; + + } + + + int64_t dim; + bool sparse_grad; + + +}; + +class GeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ge); + } + + GeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ge_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ge_out.size(), 1); + + return ge_out; + + } + + + + + +}; + +class GeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ge); + } + + GeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ge_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ge_out.size(), 1); + + return ge_out; + + } + + + + + +}; + +class Gelu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gelu); + } + + Gelu(const torch::lazy::Value& self, const c10::string_view& approximate, std::vector&& shapes) + : TsNode( + Gelu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(approximate)), + approximate(approximate) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", approximate=" << approximate; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const c10::string_view& approximate) const { + size_t i = 0; + return (operand(i++) == self && + this->approximate == approximate); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("approximate", approximate); + torch::lazy::TSOpVector gelu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gelu_out.size(), 1); + + return gelu_out; + + } + + + std::string approximate; + + +}; + +class GeluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gelu_backward); + } + + GeluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const c10::string_view& approximate, std::vector&& shapes) + : TsNode( + GeluBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(approximate)), + approximate(approximate) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", approximate=" << approximate; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const c10::string_view& approximate) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->approximate == approximate); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("approximate", approximate); + torch::lazy::TSOpVector gelu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gelu_backward_out.size(), 1); + + return gelu_backward_out; + + } + + + std::string approximate; + + +}; + +class Glu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu); + } + + Glu(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + Glu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_out.size(), 1); + + return glu_out; + + } + + + int64_t dim; + + +}; + +class GluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu_backward); + } + + GluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + GluBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_backward_out.size(), 1); + + return glu_backward_out; + + } + + + int64_t dim; + + +}; + +class GluJvp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu_jvp); + } + + GluJvp(const torch::lazy::Value& glu, const torch::lazy::Value& x, const torch::lazy::Value& dx, const int64_t& dim, std::vector&& shapes) + : TsNode( + GluJvp::ClassOpKind(), + OpList{glu, x, dx}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& glu, const torch::lazy::Value& x, const torch::lazy::Value& dx, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == glu && + operand(i++) == x && + operand(i++) == dx && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_jvp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_jvp_out.size(), 1); + + return glu_jvp_out; + + } + + + int64_t dim; + + +}; + +class GridSampler2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::grid_sampler_2d); + } + + GridSampler2d(const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, std::vector&& shapes) + : TsNode( + GridSampler2d::ClassOpKind(), + OpList{input, grid}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(interpolation_mode, padding_mode, align_corners)), + interpolation_mode(interpolation_mode), + padding_mode(padding_mode), + align_corners(align_corners) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", interpolation_mode=" << interpolation_mode; + ss << ", padding_mode=" << padding_mode; + ss << ", align_corners=" << align_corners; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners) const { + size_t i = 0; + return (operand(i++) == input && + operand(i++) == grid && + this->interpolation_mode == interpolation_mode && + this->padding_mode == padding_mode && + this->align_corners == align_corners); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("interpolation_mode", interpolation_mode); + arguments.emplace_back("padding_mode", padding_mode); + arguments.emplace_back("align_corners", align_corners); + + torch::lazy::TSOpVector grid_sampler_2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(grid_sampler_2d_out.size(), 1); + + return grid_sampler_2d_out; + + } + + + int64_t interpolation_mode; + int64_t padding_mode; + bool align_corners; + + +}; + +class GridSampler2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::grid_sampler_2d_backward); + } + + GridSampler2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + GridSampler2dBackward::ClassOpKind(), + OpList{grad_output, input, grid}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(interpolation_mode, padding_mode, align_corners, output_mask)), + interpolation_mode(interpolation_mode), + padding_mode(padding_mode), + align_corners(align_corners), + output_mask(output_mask) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", interpolation_mode=" << interpolation_mode; + ss << ", padding_mode=" << padding_mode; + ss << ", align_corners=" << align_corners; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == input && + operand(i++) == grid && + this->interpolation_mode == interpolation_mode && + this->padding_mode == padding_mode && + this->align_corners == align_corners && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("interpolation_mode", interpolation_mode); + arguments.emplace_back("padding_mode", padding_mode); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector grid_sampler_2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(grid_sampler_2d_backward_out.size(), 2); + + return grid_sampler_2d_backward_out; + + } + + + int64_t interpolation_mode; + int64_t padding_mode; + bool align_corners; + ::std::vector output_mask; + + +}; + +class GtScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gt); + } + + GtScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GtScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector gt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gt_out.size(), 1); + + return gt_out; + + } + + + + + +}; + +class GtTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gt); + } + + GtTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GtTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector gt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gt_out.size(), 1); + + return gt_out; + + } + + + + + +}; + +class Hardsigmoid : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::hardsigmoid); + } + + Hardsigmoid(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Hardsigmoid::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector hardsigmoid_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(hardsigmoid_out.size(), 1); + + return hardsigmoid_out; + + } + + + + + +}; + +class IndexSelect : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::index_select); + } + + IndexSelect(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, std::vector&& shapes) + : TsNode( + IndexSelect::ClassOpKind(), + OpList{self, index}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector index_select_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(index_select_out.size(), 1); + + return index_select_out; + + } + + + int64_t dim; + + +}; + +class LeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::le); + } + + LeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector le_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(le_out.size(), 1); + + return le_out; + + } + + + + + +}; + +class LeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::le); + } + + LeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector le_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(le_out.size(), 1); + + return le_out; + + } + + + + + +}; + +class LeakyRelu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::leaky_relu); + } + + LeakyRelu(const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, std::vector&& shapes) + : TsNode( + LeakyRelu::ClassOpKind(), + OpList{self, negative_slope}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& negative_slope) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == negative_slope); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector leaky_relu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(leaky_relu_out.size(), 1); + + return leaky_relu_out; + + } + + + + + +}; + +class LeakyReluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::leaky_relu_backward); + } + + LeakyReluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, const bool& self_is_result, std::vector&& shapes) + : TsNode( + LeakyReluBackward::ClassOpKind(), + OpList{grad_output, self, negative_slope}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(self_is_result)), + self_is_result(self_is_result) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", self_is_result=" << self_is_result; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, const bool& self_is_result) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == negative_slope && + this->self_is_result == self_is_result); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("self_is_result", self_is_result); + + torch::lazy::TSOpVector leaky_relu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(leaky_relu_backward_out.size(), 1); + + return leaky_relu_backward_out; + + } + + + bool self_is_result; + + +}; + +class Log : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log); + } + + Log(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Log::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_out.size(), 1); + + return log_out; + + } + + + + + +}; + +class Log2 : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log2); + } + + Log2(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Log2::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log2_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log2_out.size(), 1); + + return log2_out; + + } + + + + + +}; + +class LogSigmoidBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log_sigmoid_backward); + } + + LogSigmoidBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& buffer, std::vector&& shapes) + : TsNode( + LogSigmoidBackward::ClassOpKind(), + OpList{grad_output, self, buffer}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& buffer) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == buffer); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_sigmoid_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_sigmoid_backward_out.size(), 1); + + return log_sigmoid_backward_out; + + } + + + + + +}; + +class LogSigmoidForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log_sigmoid_forward); + } + + LogSigmoidForward(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + LogSigmoidForward::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_sigmoid_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_sigmoid_forward_out.size(), 2); + + return log_sigmoid_forward_out; + + } + + + + + +}; + +class Logdet : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::logdet); + } + + Logdet(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Logdet::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector logdet_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(logdet_out.size(), 1); + + return logdet_out; + + } + + + + + +}; + +class LtScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::lt); + } + + LtScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LtScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector lt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(lt_out.size(), 1); + + return lt_out; + + } + + + + + +}; + +class LtTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::lt); + } + + LtTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LtTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector lt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(lt_out.size(), 1); + + return lt_out; + + } + + + + + +}; + +class MaskedFillScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::masked_fill); + } + + MaskedFillScalar(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + MaskedFillScalar::ClassOpKind(), + OpList{self, mask, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mask && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector masked_fill_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(masked_fill_out.size(), 1); + + return masked_fill_out; + + } + + + + + +}; + +class MaskedFillTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::masked_fill); + } + + MaskedFillTensor(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + MaskedFillTensor::ClassOpKind(), + OpList{self, mask, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mask && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector masked_fill_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(masked_fill_out.size(), 1); + + return masked_fill_out; + + } + + + + + +}; + +class MaxDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max); + } + + MaxDim(const torch::lazy::Value& self, const int64_t& dim, const bool& keepdim, std::vector&& shapes) + : TsNode( + MaxDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector max_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_out.size(), 2); + + return max_out; + + } + + + int64_t dim; + bool keepdim; + + +}; + +class Max : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max); + } + + Max(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Max::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector max_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_out.size(), 1); + + return max_out; + + } + + + + + +}; + +class MaxPool2dWithIndices : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max_pool2d_with_indices); + } + + MaxPool2dWithIndices(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, std::vector&& shapes) + : TsNode( + MaxPool2dWithIndices::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(kernel_size, stride, padding, dilation, ceil_mode)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + ceil_mode(ceil_mode) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", ceil_mode=" << ceil_mode; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode) const { + size_t i = 0; + return (operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->ceil_mode == ceil_mode); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("ceil_mode", ceil_mode); + + torch::lazy::TSOpVector max_pool2d_with_indices_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_pool2d_with_indices_out.size(), 2); + + return max_pool2d_with_indices_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool ceil_mode; + + +}; + +class MaxPool2dWithIndicesBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max_pool2d_with_indices_backward); + } + + MaxPool2dWithIndicesBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, const torch::lazy::Value& indices, std::vector&& shapes) + : TsNode( + MaxPool2dWithIndicesBackward::ClassOpKind(), + OpList{grad_output, self, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, dilation, ceil_mode)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + ceil_mode(ceil_mode) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", ceil_mode=" << ceil_mode; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, const torch::lazy::Value& indices) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == indices && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->ceil_mode == ceil_mode); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector max_pool2d_with_indices_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_pool2d_with_indices_backward_out.size(), 1); + + return max_pool2d_with_indices_backward_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool ceil_mode; + + +}; + +class Maximum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::maximum); + } + + Maximum(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + Maximum::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector maximum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(maximum_out.size(), 1); + + return maximum_out; + + } + + + + + +}; + +class Mean : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mean); + } + + Mean(const torch::lazy::Value& self, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Mean::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector mean_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mean_out.size(), 1); + + return mean_out; + + } + + + ::std::optional dtype; + + +}; + +class MeanDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mean); + } + + MeanDim(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + MeanDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim, dtype)), + dim(dim), + keepdim(keepdim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector mean_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mean_out.size(), 1); + + return mean_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + ::std::optional dtype; + + +}; + +class Min : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::min); + } + + Min(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Min::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector min_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(min_out.size(), 1); + + return min_out; + + } + + + + + +}; + +class Minimum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::minimum); + } + + Minimum(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + Minimum::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector minimum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(minimum_out.size(), 1); + + return minimum_out; + + } + + + + + +}; + +class Mm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mm); + } + + Mm(const torch::lazy::Value& self, const torch::lazy::Value& mat2, std::vector&& shapes) + : TsNode( + Mm::ClassOpKind(), + OpList{self, mat2}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mm_out.size(), 1); + + return mm_out; + + } + + + + + +}; + +class MulTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mul); + } + + MulTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + MulTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mul_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mul_out.size(), 1); + + return mul_out; + + } + + + + + +}; + +class Mv : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mv); + } + + Mv(const torch::lazy::Value& self, const torch::lazy::Value& vec, std::vector&& shapes) + : TsNode( + Mv::ClassOpKind(), + OpList{self, vec}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& vec) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == vec); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mv_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mv_out.size(), 1); + + return mv_out; + + } + + + + + +}; + +class NativeBatchNorm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_batch_norm); + } + + NativeBatchNorm(const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& bias, const ::std::optional& running_mean, const ::std::optional& running_var, const bool& training, const double& momentum, const double& eps, std::vector&& shapes) + : TsNode( + NativeBatchNorm::ClassOpKind(), + OpList{input, weight.value_or(kNullValue), bias.value_or(kNullValue), running_mean.value_or(kNullValue), running_var.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(training, momentum, eps)), + training(training), + momentum(momentum), + eps(eps) + { + has_weight = !!weight; + has_bias = !!bias; + has_running_mean = !!running_mean; + has_running_var = !!running_var; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", training=" << training; + ss << ", momentum=" << momentum; + ss << ", eps=" << eps; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& bias, const ::std::optional& running_mean, const ::std::optional& running_var, const bool& training, const double& momentum, const double& eps) const { + size_t i = 0; + return (operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + nullable_operand(i++) == running_mean.value_or(kNullValue) && + nullable_operand(i++) == running_var.value_or(kNullValue) && + this->training == training && + this->momentum == momentum && + this->eps == eps); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_var ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("training", training); + arguments.emplace_back("momentum", momentum); + arguments.emplace_back("eps", eps); + + torch::lazy::TSOpVector native_batch_norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_batch_norm_out.size(), 3); + + return native_batch_norm_out; + + } + + + bool training; + double momentum; + double eps; + bool has_weight: 1; + bool has_bias: 1; + bool has_running_mean: 1; + bool has_running_var: 1; + +}; + +class NativeBatchNormBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_batch_norm_backward); + } + + NativeBatchNormBackward(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& running_mean, const ::std::optional& running_var, const ::std::optional& save_mean, const ::std::optional& save_invstd, const bool& train, const double& eps, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + NativeBatchNormBackward::ClassOpKind(), + OpList{grad_out, input, weight.value_or(kNullValue), running_mean.value_or(kNullValue), running_var.value_or(kNullValue), save_mean.value_or(kNullValue), save_invstd.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(train, eps, output_mask)), + train(train), + eps(eps), + output_mask(output_mask) + { + has_weight = !!weight; + has_running_mean = !!running_mean; + has_running_var = !!running_var; + has_save_mean = !!save_mean; + has_save_invstd = !!save_invstd; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", train=" << train; + ss << ", eps=" << eps; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& running_mean, const ::std::optional& running_var, const ::std::optional& save_mean, const ::std::optional& save_invstd, const bool& train, const double& eps, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_out && + operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == running_mean.value_or(kNullValue) && + nullable_operand(i++) == running_var.value_or(kNullValue) && + nullable_operand(i++) == save_mean.value_or(kNullValue) && + nullable_operand(i++) == save_invstd.value_or(kNullValue) && + this->train == train && + this->eps == eps && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(10); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_var ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_save_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_save_invstd ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("train", train); + arguments.emplace_back("eps", eps); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector native_batch_norm_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_batch_norm_backward_out.size(), 3); + + return native_batch_norm_backward_out; + + } + + + bool train; + double eps; + ::std::vector output_mask; + bool has_weight: 1; + bool has_running_mean: 1; + bool has_running_var: 1; + bool has_save_mean: 1; + bool has_save_invstd: 1; + +}; + +class NativeDropout : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_dropout); + } + + NativeDropout(const torch::lazy::Value& input, const double& p, const ::std::optional& train, std::vector&& shapes) + : TsNode( + NativeDropout::ClassOpKind(), + OpList{input}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(p, train)), + p(p), + train(train) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", p=" << p; + if (train.has_value()) { + ss << ", train=" << train.value(); + } else { + ss << ", train=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const double& p, const ::std::optional& train) const { + size_t i = 0; + return (operand(i++) == input && + this->p == p && + ((!this->train&&!train) || (this->train&&train && *(this->train) == *train))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("p", p); + arguments.emplace_back("train", train); + + torch::lazy::TSOpVector native_dropout_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_dropout_out.size(), 2); + + return native_dropout_out; + + } + + + double p; + ::std::optional train; + + +}; + +class NativeDropoutBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_dropout_backward); + } + + NativeDropoutBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& mask, const double& scale, std::vector&& shapes) + : TsNode( + NativeDropoutBackward::ClassOpKind(), + OpList{grad_output, mask}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(scale)), + scale(scale) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", scale=" << scale; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& mask, const double& scale) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == mask && + this->scale == scale); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("scale", scale); + + torch::lazy::TSOpVector native_dropout_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_dropout_backward_out.size(), 1); + + return native_dropout_backward_out; + + } + + + double scale; + + +}; + +class NativeLayerNorm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_layer_norm); + } + + NativeLayerNorm(const torch::lazy::Value& input, const ::std::vector& normalized_shape, const ::std::optional& weight, const ::std::optional& bias, const double& eps, std::vector&& shapes) + : TsNode( + NativeLayerNorm::ClassOpKind(), + OpList{input, weight.value_or(kNullValue), bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(normalized_shape, eps)), + normalized_shape(normalized_shape), + eps(eps) + { + has_weight = !!weight; + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", normalized_shape=" << normalized_shape; + ss << ", eps=" << eps; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::vector& normalized_shape, const ::std::optional& weight, const ::std::optional& bias, const double& eps) const { + size_t i = 0; + return (operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->normalized_shape == normalized_shape && + this->eps == eps); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("normalized_shape", normalized_shape); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("eps", eps); + + torch::lazy::TSOpVector native_layer_norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_layer_norm_out.size(), 3); + + return native_layer_norm_out; + + } + + + ::std::vector normalized_shape; + double eps; + bool has_weight: 1; + bool has_bias: 1; + +}; + +class NativeLayerNormBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_layer_norm_backward); + } + + NativeLayerNormBackward(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::vector& normalized_shape, const torch::lazy::Value& mean, const torch::lazy::Value& rstd, const ::std::optional& weight, const ::std::optional& bias, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + NativeLayerNormBackward::ClassOpKind(), + OpList{grad_out, input, mean, rstd, weight.value_or(kNullValue), bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(normalized_shape, output_mask)), + normalized_shape(normalized_shape), + output_mask(output_mask) + { + has_weight = !!weight; + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", normalized_shape=" << normalized_shape; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::vector& normalized_shape, const torch::lazy::Value& mean, const torch::lazy::Value& rstd, const ::std::optional& weight, const ::std::optional& bias, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_out && + operand(i++) == input && + operand(i++) == mean && + operand(i++) == rstd && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->normalized_shape == normalized_shape && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("normalized_shape", normalized_shape); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector native_layer_norm_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_layer_norm_backward_out.size(), 3); + + return native_layer_norm_backward_out; + + } + + + ::std::vector normalized_shape; + ::std::vector output_mask; + bool has_weight: 1; + bool has_bias: 1; + +}; + +class NeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ne); + } + + NeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + NeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ne_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ne_out.size(), 1); + + return ne_out; + + } + + + + + +}; + +class NeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ne); + } + + NeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + NeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ne_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ne_out.size(), 1); + + return ne_out; + + } + + + + + +}; + +class Neg : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::neg); + } + + Neg(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Neg::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector neg_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(neg_out.size(), 1); + + return neg_out; + + } + + + + + +}; + +class NllLoss2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss2d_backward); + } + + NllLoss2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight, std::vector&& shapes) + : TsNode( + NllLoss2dBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue), total_weight}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + operand(i++) == total_weight && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nll_loss2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss2d_backward_out.size(), 1); + + return nll_loss2d_backward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLoss2dForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss2d_forward); + } + + NllLoss2dForward(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, std::vector&& shapes) + : TsNode( + NllLoss2dForward::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + + torch::lazy::TSOpVector nll_loss2d_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss2d_forward_out.size(), 2); + + return nll_loss2d_forward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLossBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss_backward); + } + + NllLossBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight, std::vector&& shapes) + : TsNode( + NllLossBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue), total_weight}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + operand(i++) == total_weight && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nll_loss_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss_backward_out.size(), 1); + + return nll_loss_backward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLossForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss_forward); + } + + NllLossForward(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, std::vector&& shapes) + : TsNode( + NllLossForward::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + + torch::lazy::TSOpVector nll_loss_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss_forward_out.size(), 2); + + return nll_loss_forward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class Nonzero : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nonzero); + } + + Nonzero(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Nonzero::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nonzero_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nonzero_out.size(), 1); + + return nonzero_out; + + } + + + + + +}; + +class NormScalaroptDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::norm); + } + + NormScalaroptDim(const torch::lazy::Value& self, const ::std::optional& p, const ::std::vector& dim, const bool& keepdim, std::vector&& shapes) + : TsNode( + NormScalaroptDim::ClassOpKind(), + OpList{self, p.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + has_p = !!p; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& p, const ::std::vector& dim, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == p.value_or(kNullValue) && + this->dim == dim && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_p ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(norm_out.size(), 1); + + return norm_out; + + } + + + ::std::vector dim; + bool keepdim; + bool has_p: 1; + +}; + +class NormalFunctional : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::normal_functional); + } + + NormalFunctional(const torch::lazy::Value& self, const double& mean, const double& std, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + NormalFunctional::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(mean, std, generator)), + mean(mean), + std(std), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", mean=" << mean; + ss << ", std=" << std; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& mean, const double& std, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->mean == mean && + this->std == std && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("mean", mean); + arguments.emplace_back("std", std); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector normal_functional_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(normal_functional_out.size(), 1); + + return normal_functional_out; + + } + + + double mean; + double std; + ::std::optional generator; + + +}; + +class PermuteCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::permute_copy); + } + + PermuteCopy(const torch::lazy::Value& self, const ::std::vector& dims, std::vector&& shapes) + : TsNode( + PermuteCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dims)), + dims(dims) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dims=" << dims; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dims) const { + size_t i = 0; + return (operand(i++) == self && + this->dims == dims); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dims", dims); + + torch::lazy::TSOpVector permute_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(permute_copy_out.size(), 1); + + return permute_copy_out; + + } + + + ::std::vector dims; + + +}; + +class PowTensorTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::pow); + } + + PowTensorTensor(const torch::lazy::Value& self, const torch::lazy::Value& exponent, std::vector&& shapes) + : TsNode( + PowTensorTensor::ClassOpKind(), + OpList{self, exponent}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& exponent) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == exponent); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector pow_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(pow_out.size(), 1); + + return pow_out; + + } + + + + + +}; + +class PowTensorScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::pow); + } + + PowTensorScalar(const torch::lazy::Value& self, const torch::lazy::Value& exponent, std::vector&& shapes) + : TsNode( + PowTensorScalar::ClassOpKind(), + OpList{self, exponent}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& exponent) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == exponent); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector pow_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(pow_out.size(), 1); + + return pow_out; + + } + + + + + +}; + +class RandomFrom : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + RandomFrom(const torch::lazy::Value& self, const int64_t& from, const ::std::optional& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + RandomFrom::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(from, to, generator)), + from(from), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", from=" << from; + if (to.has_value()) { + ss << ", to=" << to.value(); + } else { + ss << ", to=null"; + } + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& from, const ::std::optional& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->from == from && + ((!this->to&&!to) || (this->to&&to && *(this->to) == *to)) && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("from", from); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + int64_t from; + ::std::optional to; + ::std::optional generator; + + +}; + +class RandomTo : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + RandomTo(const torch::lazy::Value& self, const int64_t& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + RandomTo::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(to, generator)), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", to=" << to; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->to == to && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + int64_t to; + ::std::optional generator; + + +}; + +class Random : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + Random(const torch::lazy::Value& self, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Random::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(generator)), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + ::std::optional generator; + + +}; + +class Reciprocal : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::reciprocal); + } + + Reciprocal(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Reciprocal::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector reciprocal_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(reciprocal_out.size(), 1); + + return reciprocal_out; + + } + + + + + +}; + +class Relu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::relu); + } + + Relu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Relu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector relu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(relu_out.size(), 1); + + return relu_out; + + } + + + + + +}; + +class RemainderTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::remainder); + } + + RemainderTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + RemainderTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector remainder_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(remainder_out.size(), 1); + + return remainder_out; + + } + + + + + +}; + +class Repeat : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::repeat); + } + + Repeat(const torch::lazy::Value& self, const ::std::vector& repeats, std::vector&& shapes) + : TsNode( + Repeat::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(repeats)), + repeats(repeats) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", repeats=" << repeats; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& repeats) const { + size_t i = 0; + return (operand(i++) == self && + this->repeats == repeats); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("repeats", repeats); + + torch::lazy::TSOpVector repeat_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(repeat_out.size(), 1); + + return repeat_out; + + } + + + ::std::vector repeats; + + +}; + +class Rsqrt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::rsqrt); + } + + Rsqrt(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Rsqrt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector rsqrt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(rsqrt_out.size(), 1); + + return rsqrt_out; + + } + + + + + +}; + +class ScatterAdd : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::scatter_add); + } + + ScatterAdd(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const torch::lazy::Value& src, std::vector&& shapes) + : TsNode( + ScatterAdd::ClassOpKind(), + OpList{self, index, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const torch::lazy::Value& src) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + operand(i++) == src && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector scatter_add_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(scatter_add_out.size(), 1); + + return scatter_add_out; + + } + + + int64_t dim; + + +}; + +class SelectCopyInt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::select_copy); + } + + SelectCopyInt(const torch::lazy::Value& self, const int64_t& dim, const int64_t& index, std::vector&& shapes) + : TsNode( + SelectCopyInt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, index)), + dim(dim), + index(index) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", index=" << index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const int64_t& index) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->index == index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("index", index); + + torch::lazy::TSOpVector select_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(select_copy_out.size(), 1); + + return select_copy_out; + + } + + + int64_t dim; + int64_t index; + + +}; + +class SelectScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::select_scatter); + } + + SelectScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const int64_t& index, std::vector&& shapes) + : TsNode( + SelectScatter::ClassOpKind(), + OpList{self, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, index)), + dim(dim), + index(index) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", index=" << index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const int64_t& index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + this->dim == dim && + this->index == index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("index", index); + + torch::lazy::TSOpVector select_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(select_scatter_out.size(), 1); + + return select_scatter_out; + + } + + + int64_t dim; + int64_t index; + + +}; + +class Selu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::selu); + } + + Selu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Selu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector selu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(selu_out.size(), 1); + + return selu_out; + + } + + + + + +}; + +class Sgn : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sgn); + } + + Sgn(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sgn::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sgn_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sgn_out.size(), 1); + + return sgn_out; + + } + + + + + +}; + +class Sigmoid : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sigmoid); + } + + Sigmoid(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sigmoid::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sigmoid_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sigmoid_out.size(), 1); + + return sigmoid_out; + + } + + + + + +}; + +class SigmoidBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(c10::Symbol::fromQualString("aten::sigmoid_backward")); + } + + SigmoidBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, std::vector&& shapes) + : TsNode( + SigmoidBackward::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sigmoid_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sigmoid_backward_out.size(), 1); + + return sigmoid_backward_out; + + } + + + + + +}; + +class Silu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::silu); + } + + Silu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Silu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector silu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(silu_out.size(), 1); + + return silu_out; + + } + + + + + +}; + +class SliceCopyTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::slice_copy); + } + + SliceCopyTensor(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step, std::vector&& shapes) + : TsNode( + SliceCopyTensor::ClassOpKind(), + OpList{self, start.value_or(kNullValue), end.value_or(kNullValue), step}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + has_start = !!start; + has_end = !!end; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == start.value_or(kNullValue) && + nullable_operand(i++) == end.value_or(kNullValue) && + operand(i++) == step && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(has_start ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_end ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector slice_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(slice_copy_out.size(), 1); + + return slice_copy_out; + + } + + + int64_t dim; + bool has_start: 1; + bool has_end: 1; + +}; + +class SliceScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::slice_scatter); + } + + SliceScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step, std::vector&& shapes) + : TsNode( + SliceScatter::ClassOpKind(), + OpList{self, src, start.value_or(kNullValue), end.value_or(kNullValue), step}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + has_start = !!start; + has_end = !!end; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + nullable_operand(i++) == start.value_or(kNullValue) && + nullable_operand(i++) == end.value_or(kNullValue) && + operand(i++) == step && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(has_start ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_end ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector slice_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(slice_scatter_out.size(), 1); + + return slice_scatter_out; + + } + + + int64_t dim; + bool has_start: 1; + bool has_end: 1; + +}; + +class SmoothL1Loss : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::smooth_l1_loss); + } + + SmoothL1Loss(const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta, std::vector&& shapes) + : TsNode( + SmoothL1Loss::ClassOpKind(), + OpList{self, target}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, beta)), + reduction(reduction), + beta(beta) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", beta=" << beta; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + this->reduction == reduction && + this->beta == beta); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("beta", beta); + + torch::lazy::TSOpVector smooth_l1_loss_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(smooth_l1_loss_out.size(), 1); + + return smooth_l1_loss_out; + + } + + + int64_t reduction; + double beta; + + +}; + +class SmoothL1LossBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::smooth_l1_loss_backward); + } + + SmoothL1LossBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta, std::vector&& shapes) + : TsNode( + SmoothL1LossBackward::ClassOpKind(), + OpList{grad_output, self, target}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, beta)), + reduction(reduction), + beta(beta) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", beta=" << beta; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + this->reduction == reduction && + this->beta == beta); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("beta", beta); + + torch::lazy::TSOpVector smooth_l1_loss_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(smooth_l1_loss_backward_out.size(), 1); + + return smooth_l1_loss_backward_out; + + } + + + int64_t reduction; + double beta; + + +}; + +class Softplus : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::softplus); + } + + Softplus(const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + Softplus::ClassOpKind(), + OpList{self, beta, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == beta && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector softplus_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(softplus_out.size(), 1); + + return softplus_out; + + } + + + + + +}; + +class SoftplusBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::softplus_backward); + } + + SoftplusBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + SoftplusBackward::ClassOpKind(), + OpList{grad_output, self, beta, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == beta && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector softplus_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(softplus_backward_out.size(), 1); + + return softplus_backward_out; + + } + + + + + +}; + +class Sort : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sort); + } + + Sort(const torch::lazy::Value& self, const int64_t& dim, const bool& descending, std::vector&& shapes) + : TsNode( + Sort::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(dim, descending)), + dim(dim), + descending(descending) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", descending=" << descending; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& descending) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->descending == descending); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("descending", descending); + + torch::lazy::TSOpVector sort_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sort_out.size(), 2); + + return sort_out; + + } + + + int64_t dim; + bool descending; + + +}; + +class Sqrt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sqrt); + } + + Sqrt(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sqrt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sqrt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sqrt_out.size(), 1); + + return sqrt_out; + + } + + + + + +}; + +class SqueezeCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + SqueezeCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + + + +}; + +class SqueezeCopyDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopyDim(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + SqueezeCopyDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + int64_t dim; + + +}; + +class SqueezeCopyDims : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopyDims(const torch::lazy::Value& self, const ::std::vector& dim, std::vector&& shapes) + : TsNode( + SqueezeCopyDims::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + ::std::vector dim; + + +}; + +class Stack : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::stack); + } + + Stack(const torch::lazy::Value& tensors, const int64_t& dim, std::vector&& shapes) + : TsNode( + Stack::ClassOpKind(), + OpList{tensors}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& tensors, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == tensors && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector stack_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(stack_out.size(), 1); + + return stack_out; + + } + + + int64_t dim; + + +}; + +class Std : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + Std(const torch::lazy::Value& self, const bool& unbiased, std::vector&& shapes) + : TsNode( + Std::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(unbiased)), + unbiased(unbiased) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", unbiased=" << unbiased; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const bool& unbiased) const { + size_t i = 0; + return (operand(i++) == self && + this->unbiased == unbiased); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("unbiased", unbiased); + + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + bool unbiased; + + +}; + +class StdDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + StdDim(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& unbiased, const bool& keepdim, std::vector&& shapes) + : TsNode( + StdDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, unbiased, keepdim)), + dim(dim), + unbiased(unbiased), + keepdim(keepdim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", unbiased=" << unbiased; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& unbiased, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->unbiased == unbiased && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("unbiased", unbiased); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + ::std::optional<::std::vector> dim; + bool unbiased; + bool keepdim; + + +}; + +class StdCorrection : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + StdCorrection(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const ::std::optional& correction, const bool& keepdim, std::vector&& shapes) + : TsNode( + StdCorrection::ClassOpKind(), + OpList{self, correction.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + has_correction = !!correction; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const ::std::optional& correction, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == correction.value_or(kNullValue) && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + kwarguments.emplace_back("correction", has_correction ? loctx->GetOutputOp(operand(i++)) : nullptr); + kwarguments.emplace_back("keepdim", keepdim); + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + bool has_correction: 1; + +}; + +class SubTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sub); + } + + SubTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + SubTensor::ClassOpKind(), + OpList{self, other, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector sub_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sub_out.size(), 1); + + return sub_out; + + } + + + + + +}; + +class Sum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sum); + } + + Sum(const torch::lazy::Value& self, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Sum::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector sum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sum_out.size(), 1); + + return sum_out; + + } + + + ::std::optional dtype; + + +}; + +class SumDimIntlist : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sum); + } + + SumDimIntlist(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + SumDimIntlist::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim, dtype)), + dim(dim), + keepdim(keepdim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector sum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sum_out.size(), 1); + + return sum_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + ::std::optional dtype; + + +}; + +class TCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::t_copy); + } + + TCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + TCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector t_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(t_copy_out.size(), 1); + + return t_copy_out; + + } + + + + + +}; + +class Tanh : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tanh); + } + + Tanh(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Tanh::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector tanh_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tanh_out.size(), 1); + + return tanh_out; + + } + + + + + +}; + +class TanhBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tanh_backward); + } + + TanhBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, std::vector&& shapes) + : TsNode( + TanhBackward::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector tanh_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tanh_backward_out.size(), 1); + + return tanh_backward_out; + + } + + + + + +}; + +class Threshold : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::threshold); + } + + Threshold(const torch::lazy::Value& self, const torch::lazy::Value& threshold, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Threshold::ClassOpKind(), + OpList{self, threshold, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& threshold, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == threshold && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector threshold_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(threshold_out.size(), 1); + + return threshold_out; + + } + + + + + +}; + +class ThresholdBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::threshold_backward); + } + + ThresholdBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + ThresholdBackward::ClassOpKind(), + OpList{grad_output, self, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector threshold_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(threshold_backward_out.size(), 1); + + return threshold_backward_out; + + } + + + + + +}; + +class Topk : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::topk); + } + + Topk(const torch::lazy::Value& self, const int64_t& k, const int64_t& dim, const bool& largest, const bool& sorted, std::vector&& shapes) + : TsNode( + Topk::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(k, dim, largest, sorted)), + k(k), + dim(dim), + largest(largest), + sorted(sorted) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", k=" << k; + ss << ", dim=" << dim; + ss << ", largest=" << largest; + ss << ", sorted=" << sorted; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& k, const int64_t& dim, const bool& largest, const bool& sorted) const { + size_t i = 0; + return (operand(i++) == self && + this->k == k && + this->dim == dim && + this->largest == largest && + this->sorted == sorted); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("k", k); + arguments.emplace_back("dim", dim); + arguments.emplace_back("largest", largest); + arguments.emplace_back("sorted", sorted); + + torch::lazy::TSOpVector topk_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(topk_out.size(), 2); + + return topk_out; + + } + + + int64_t k; + int64_t dim; + bool largest; + bool sorted; + + +}; + +class Trace : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::trace); + } + + Trace(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Trace::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector trace_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(trace_out.size(), 1); + + return trace_out; + + } + + + + + +}; + +class TransposeCopyInt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::transpose_copy); + } + + TransposeCopyInt(const torch::lazy::Value& self, const int64_t& dim0, const int64_t& dim1, std::vector&& shapes) + : TsNode( + TransposeCopyInt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim0, dim1)), + dim0(dim0), + dim1(dim1) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim0=" << dim0; + ss << ", dim1=" << dim1; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim0, const int64_t& dim1) const { + size_t i = 0; + return (operand(i++) == self && + this->dim0 == dim0 && + this->dim1 == dim1); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim0", dim0); + arguments.emplace_back("dim1", dim1); + + torch::lazy::TSOpVector transpose_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(transpose_copy_out.size(), 1); + + return transpose_copy_out; + + } + + + int64_t dim0; + int64_t dim1; + + +}; + +class Tril : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tril); + } + + Tril(const torch::lazy::Value& self, const int64_t& diagonal, std::vector&& shapes) + : TsNode( + Tril::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(diagonal)), + diagonal(diagonal) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", diagonal=" << diagonal; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& diagonal) const { + size_t i = 0; + return (operand(i++) == self && + this->diagonal == diagonal); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("diagonal", diagonal); + + torch::lazy::TSOpVector tril_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tril_out.size(), 1); + + return tril_out; + + } + + + int64_t diagonal; + + +}; + +class Triu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::triu); + } + + Triu(const torch::lazy::Value& self, const int64_t& diagonal, std::vector&& shapes) + : TsNode( + Triu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(diagonal)), + diagonal(diagonal) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", diagonal=" << diagonal; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& diagonal) const { + size_t i = 0; + return (operand(i++) == self && + this->diagonal == diagonal); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("diagonal", diagonal); + + torch::lazy::TSOpVector triu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(triu_out.size(), 1); + + return triu_out; + + } + + + int64_t diagonal; + + +}; + +class Trunc : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::trunc); + } + + Trunc(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Trunc::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector trunc_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(trunc_out.size(), 1); + + return trunc_out; + + } + + + + + +}; + +class UnfoldCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::unfold_copy); + } + + UnfoldCopy(const torch::lazy::Value& self, const int64_t& dimension, const int64_t& size, const int64_t& step, std::vector&& shapes) + : TsNode( + UnfoldCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dimension, size, step)), + dimension(dimension), + size(size), + step(step) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dimension=" << dimension; + ss << ", size=" << size; + ss << ", step=" << step; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dimension, const int64_t& size, const int64_t& step) const { + size_t i = 0; + return (operand(i++) == self && + this->dimension == dimension && + this->size == size && + this->step == step); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dimension", dimension); + arguments.emplace_back("size", size); + arguments.emplace_back("step", step); + + torch::lazy::TSOpVector unfold_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(unfold_copy_out.size(), 1); + + return unfold_copy_out; + + } + + + int64_t dimension; + int64_t size; + int64_t step; + + +}; + +class Uniform : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::uniform); + } + + Uniform(const torch::lazy::Value& self, const double& from, const double& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Uniform::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(from, to, generator)), + from(from), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", from=" << from; + ss << ", to=" << to; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& from, const double& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->from == from && + this->to == to && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("from", from); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector uniform_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(uniform_out.size(), 1); + + return uniform_out; + + } + + + double from; + double to; + ::std::optional generator; + + +}; + +class UnsqueezeCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::unsqueeze_copy); + } + + UnsqueezeCopy(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + UnsqueezeCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector unsqueeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(unsqueeze_copy_out.size(), 1); + + return unsqueeze_copy_out; + + } + + + int64_t dim; + + +}; + +class UpsampleBilinear2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_bilinear2d); + } + + UpsampleBilinear2d(const torch::lazy::Value& self, const ::std::vector& output_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleBilinear2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, align_corners, scales_h, scales_w)), + output_size(output_size), + align_corners(align_corners), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", align_corners=" << align_corners; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size && + this->align_corners == align_corners && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_bilinear2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_bilinear2d_out.size(), 1); + + return upsample_bilinear2d_out; + + } + + + ::std::vector output_size; + bool align_corners; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleBilinear2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_bilinear2d_backward); + } + + UpsampleBilinear2dBackward(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleBilinear2dBackward::ClassOpKind(), + OpList{grad_output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, input_size, align_corners, scales_h, scales_w)), + output_size(output_size), + input_size(input_size), + align_corners(align_corners), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", input_size=" << input_size; + ss << ", align_corners=" << align_corners; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == grad_output && + this->output_size == output_size && + this->input_size == input_size && + this->align_corners == align_corners && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("input_size", input_size); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_bilinear2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_bilinear2d_backward_out.size(), 1); + + return upsample_bilinear2d_backward_out; + + } + + + ::std::vector output_size; + ::std::vector input_size; + bool align_corners; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleNearest2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_nearest2d); + } + + UpsampleNearest2d(const torch::lazy::Value& self, const ::std::vector& output_size, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleNearest2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, scales_h, scales_w)), + output_size(output_size), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_nearest2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_nearest2d_out.size(), 1); + + return upsample_nearest2d_out; + + } + + + ::std::vector output_size; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleNearest2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_nearest2d_backward); + } + + UpsampleNearest2dBackward(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleNearest2dBackward::ClassOpKind(), + OpList{grad_output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, input_size, scales_h, scales_w)), + output_size(output_size), + input_size(input_size), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", input_size=" << input_size; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == grad_output && + this->output_size == output_size && + this->input_size == input_size && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("input_size", input_size); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_nearest2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_nearest2d_backward_out.size(), 1); + + return upsample_nearest2d_backward_out; + + } + + + ::std::vector output_size; + ::std::vector input_size; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class ViewCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::view_copy); + } + + ViewCopy(const torch::lazy::Value& self, const ::std::vector& size, std::vector&& shapes) + : TsNode( + ViewCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size)), + size(size) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + + torch::lazy::TSOpVector view_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(view_copy_out.size(), 1); + + return view_copy_out; + + } + + + ::std::vector size; + + +}; + +class ViewCopyDtype : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::view_copy); + } + + ViewCopyDtype(const torch::lazy::Value& self, const at::ScalarType& dtype, std::vector&& shapes) + : TsNode( + ViewCopyDtype::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dtype=" << dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const at::ScalarType& dtype) const { + size_t i = 0; + return (operand(i++) == self && + this->dtype == dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dtype", dtype); + + torch::lazy::TSOpVector view_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(view_copy_out.size(), 1); + + return view_copy_out; + + } + + + at::ScalarType dtype; + + +}; + +class Zero : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::zero); + } + + Zero(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Zero::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector zero_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(zero_out.size(), 1); + + return zero_out; + + } + + + + + +}; + +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..82f63eb0acc9037329361360459e52f8eda6d55c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h @@ -0,0 +1,216 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// an external backend might generate file within its code tree +// and check all the source files within the tree with clang-format. +// so, disable it since the backend might have a different config. +// clang-format off + +// Autogenerated file by gen_backend_stubs.py. Do not edit directly! + +#include + +namespace torch { +namespace lazy { + +struct LazyNativeFunctions { + +static ::std::tuple convolution_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & weight, at::OptionalIntArrayRef bias_sizes, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups, ::std::array output_mask); +static ::std::tuple native_batch_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps); +static ::std::tuple native_batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask); +static ::std::tuple native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +static ::std::tuple native_layer_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, at::IntArrayRef normalized_shape, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask); +static ::std::tuple svd(const at::Tensor & self, bool some, bool compute_uv); +static ::std::tuple grid_sampler_2d_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); +static ::std::tuple log_sigmoid_forward(const at::Tensor & self); +static ::std::tuple max(const at::Tensor & self, int64_t dim, bool keepdim); +static ::std::tuple max_pool2d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +static ::std::tuple max_pool3d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +static ::std::tuple native_dropout(const at::Tensor & input, double p, ::std::optional train); +static ::std::tuple nll_loss2d_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +static ::std::tuple nll_loss_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +static ::std::tuple sort(const at::Tensor & self, int64_t dim, bool descending); +static ::std::tuple topk(const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted); +static at::Tensor & arange_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +static at::Tensor & fill_(at::Tensor & self, const at::Scalar & value); +static at::Tensor & logsumexp_out(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, at::Tensor & out); +static at::Tensor _adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +static at::Tensor _adaptive_avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self); +static at::Tensor _copy_from(const at::Tensor & self, const at::Tensor & dst, bool non_blocking); +static at::Tensor _copy_from_and_resize(const at::Tensor & self, const at::Tensor & dst); +static at::Tensor _log_softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +static at::Tensor _log_softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +static at::Tensor _reshape_alias_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); +static at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +static at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +static at::Tensor _to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +static at::Tensor _trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim); +static at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size); +static at::Tensor abs(const at::Tensor & self); +static at::Tensor add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +static at::Tensor addcdiv(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value); +static at::Tensor addcmul(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value); +static at::Tensor addmm(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha); +static at::Tensor alias_copy(const at::Tensor & self); +static at::Tensor all(const at::Tensor & self); +static at::Tensor any(const at::Tensor & self); +static at::Tensor as_strided_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +static at::Tensor as_strided_scatter_symint(const at::Tensor & self, const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +static at::Tensor avg_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); +static at::Tensor avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); +static at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha); +static at::Tensor bernoulli(const at::Tensor & self, ::std::optional generator); +static at::Tensor bernoulli(const at::Tensor & self, double p, ::std::optional generator); +static at::Tensor binary_cross_entropy(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +static at::Tensor binary_cross_entropy_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +static at::Tensor bitwise_and(const at::Tensor & self, const at::Tensor & other); +static at::Tensor bitwise_or(const at::Tensor & self, const at::Tensor & other); +static at::Tensor block_diag(at::TensorList tensors); +static at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2); +static at::Tensor cat(const at::ITensorListRef & tensors, int64_t dim); +static at::Tensor clamp(const at::Tensor & self, const ::std::optional & min, const ::std::optional & max); +static at::Tensor clamp_min(const at::Tensor & self, const at::Scalar & min); +static at::Tensor clone(const at::Tensor & self, ::std::optional memory_format); +static at::Tensor constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value); +static at::Tensor convolution(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups); +static at::Tensor cos(const at::Tensor & self); +static at::Tensor cumsum(const at::Tensor & self, int64_t dim, ::std::optional dtype); +static at::Tensor detach_copy(const at::Tensor & self); +static at::Tensor diag_embed(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_copy(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_scatter(const at::Tensor & self, const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor div(const at::Tensor & self, const at::Tensor & other); +static at::Tensor div(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +static at::Tensor elu(const at::Tensor & self, const at::Scalar & alpha, const at::Scalar & scale, const at::Scalar & input_scale); +static at::Tensor elu_backward(const at::Tensor & grad_output, const at::Scalar & alpha, const at::Scalar & scale, const at::Scalar & input_scale, bool is_result, const at::Tensor & self_or_result); +static at::Tensor embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse); +static at::Tensor embedding_dense_backward(const at::Tensor & grad_output, const at::Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); +static at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +static at::Tensor empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +static at::Tensor eq(const at::Tensor & self, const at::Scalar & other); +static at::Tensor eq(const at::Tensor & self, const at::Tensor & other); +static at::Tensor exp(const at::Tensor & self); +static at::Tensor expand_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); +static at::Tensor flip(const at::Tensor & self, at::IntArrayRef dims); +static at::Tensor floor(const at::Tensor & self); +static at::Tensor frac(const at::Tensor & self); +static at::Tensor gather(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad); +static at::Tensor ge(const at::Tensor & self, const at::Scalar & other); +static at::Tensor ge(const at::Tensor & self, const at::Tensor & other); +static at::Tensor gelu(const at::Tensor & self, c10::string_view approximate); +static at::Tensor gelu_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate); +static at::Tensor glu(const at::Tensor & self, int64_t dim); +static at::Tensor glu_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +static at::Tensor glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); +static at::Tensor grid_sampler_2d(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +static at::Tensor gt(const at::Tensor & self, const at::Scalar & other); +static at::Tensor gt(const at::Tensor & self, const at::Tensor & other); +static at::Tensor hardsigmoid(const at::Tensor & self); +static at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index); +static at::Tensor le(const at::Tensor & self, const at::Scalar & other); +static at::Tensor le(const at::Tensor & self, const at::Tensor & other); +static at::Tensor leaky_relu(const at::Tensor & self, const at::Scalar & negative_slope); +static at::Tensor leaky_relu_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & negative_slope, bool self_is_result); +static at::Tensor lift(const at::Tensor & self); +static at::Tensor lift_fresh(const at::Tensor & self); +static at::Tensor linalg_pinv(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); +static at::Tensor log(const at::Tensor & self); +static at::Tensor log2(const at::Tensor & self); +static at::Tensor log_sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); +static at::Tensor logdet(const at::Tensor & self); +static at::Tensor lt(const at::Tensor & self, const at::Scalar & other); +static at::Tensor lt(const at::Tensor & self, const at::Tensor & other); +static at::Tensor masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Scalar & value); +static at::Tensor masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & value); +static at::Tensor max(const at::Tensor & self); +static at::Tensor max_pool2d_with_indices_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +static at::Tensor max_pool3d_with_indices_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +static at::Tensor maximum(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mean(const at::Tensor & self, ::std::optional dtype); +static at::Tensor mean(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +static at::Tensor min(const at::Tensor & self); +static at::Tensor minimum(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mm(const at::Tensor & self, const at::Tensor & mat2); +static at::Tensor mul(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mv(const at::Tensor & self, const at::Tensor & vec); +static at::Tensor narrow_copy_symint(const at::Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); +static at::Tensor native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale); +static at::Tensor ne(const at::Tensor & self, const at::Scalar & other); +static at::Tensor ne(const at::Tensor & self, const at::Tensor & other); +static at::Tensor neg(const at::Tensor & self); +static at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +static at::Tensor nll_loss2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +static at::Tensor nll_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +static at::Tensor nonzero(const at::Tensor & self); +static at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim); +static at::Tensor normal_functional(const at::Tensor & self, double mean, double std, ::std::optional generator); +static at::Tensor permute_copy(const at::Tensor & self, at::IntArrayRef dims); +static at::Tensor pixel_shuffle(const at::Tensor & self, int64_t upscale_factor); +static at::Tensor pixel_unshuffle(const at::Tensor & self, int64_t downscale_factor); +static at::Tensor pow(const at::Tensor & self, const at::Scalar & exponent); +static at::Tensor pow(const at::Tensor & self, const at::Tensor & exponent); +static at::Tensor random(const at::Tensor & self, ::std::optional generator); +static at::Tensor random(const at::Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); +static at::Tensor random(const at::Tensor & self, int64_t to, ::std::optional generator); +static at::Tensor reciprocal(const at::Tensor & self); +static at::Tensor relu(const at::Tensor & self); +static at::Tensor remainder(const at::Tensor & self, const at::Tensor & other); +static at::Tensor repeat(const at::Tensor & self, at::IntArrayRef repeats); +static at::Tensor rsqrt(const at::Tensor & self); +static at::Tensor scatter_add(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & src); +static at::Tensor select_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index); +static at::Tensor select_copy(const at::Tensor & self, int64_t dim, int64_t index); +static at::Tensor select_scatter(const at::Tensor & self, const at::Tensor & src, int64_t dim, int64_t index); +static at::Tensor sgn(const at::Tensor & self); +static at::Tensor sigmoid(const at::Tensor & self); +static at::Tensor sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & output); +static at::Tensor silu(const at::Tensor & self); +static at::Tensor slice_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step); +static at::Tensor slice_copy_symint(const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +static at::Tensor slice_scatter_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +static at::Tensor smooth_l1_loss(const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +static at::Tensor smooth_l1_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +static at::Tensor softplus(const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold); +static at::Tensor softplus_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold); +static at::Tensor sqrt(const at::Tensor & self); +static at::Tensor squeeze_copy(const at::Tensor & self); +static at::Tensor squeeze_copy(const at::Tensor & self, at::IntArrayRef dim); +static at::Tensor squeeze_copy(const at::Tensor & self, int64_t dim); +static at::Tensor stack(at::TensorList tensors, int64_t dim); +static at::Tensor std(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim); +static at::Tensor std(const at::Tensor & self, at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); +static at::Tensor std(const at::Tensor & self, bool unbiased); +static at::Tensor sub(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +static at::Tensor sum(const at::Tensor & self, ::std::optional dtype); +static at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +static at::Tensor t_copy(const at::Tensor & self); +static at::Tensor tanh(const at::Tensor & self); +static at::Tensor tanh_backward(const at::Tensor & grad_output, const at::Tensor & output); +static at::Tensor threshold(const at::Tensor & self, const at::Scalar & threshold, const at::Scalar & value); +static at::Tensor threshold_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & threshold); +static at::Tensor trace(const at::Tensor & self); +static at::Tensor transpose_copy(const at::Tensor & self, int64_t dim0, int64_t dim1); +static at::Tensor tril(const at::Tensor & self, int64_t diagonal); +static at::Tensor triu(const at::Tensor & self, int64_t diagonal); +static at::Tensor trunc(const at::Tensor & self); +static at::Tensor unfold_copy(const at::Tensor & self, int64_t dimension, int64_t size, int64_t step); +static at::Tensor uniform(const at::Tensor & self, double from, double to, ::std::optional generator); +static at::Tensor unsqueeze_copy(const at::Tensor & self, int64_t dim); +static at::Tensor upsample_bilinear2d(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_bilinear2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_nearest2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_nearest2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor view_copy(const at::Tensor & self, at::ScalarType dtype); +static at::Tensor view_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size); +static at::Tensor zero(const at::Tensor & self); +static ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps); +static at::Tensor max_pool3d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); + +}; +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h new file mode 100644 index 0000000000000000000000000000000000000000..4cea78933ef5fb0fe8fb8dea61f56825200d92bc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h @@ -0,0 +1,160 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +// This file contains autogenerated LazyTensor Non Native IR nodes + +namespace torch { +namespace lazy { + +class Scalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::prim::Constant); + } + + Scalar(const at::Scalar& value, const at::ScalarType& type) + : TsNode( + Scalar::ClassOpKind(), + OpList{}, + compute_shape_scalar(value, type), + /* num_outputs */ 1, + torch::lazy::MHash(value, type)), + value(value), + type(type) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", value=" << value; + ss << ", type=" << type; + return ss.str(); + } + + + + bool CanBeReused(const at::Scalar& value, const at::ScalarType& type) const; + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + at::Scalar value; + at::ScalarType type; + + +}; + +class Expand : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::expand); + } + + Expand(const torch::lazy::Value& input, const ::std::vector& size, const bool& is_scalar_expand) + : TsNode( + Expand::ClassOpKind(), + OpList{input}, + [&](){ return compute_shape_expand(operand(0), size, is_scalar_expand)[0]; }, + /* num_outputs */ 1, + torch::lazy::MHash(size, is_scalar_expand)), + size(size), + is_scalar_expand(is_scalar_expand) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", is_scalar_expand=" << is_scalar_expand; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::vector& size, const bool& is_scalar_expand) const { + size_t i = 0; + return (operand(i++) == input && + this->size == size && + this->is_scalar_expand == is_scalar_expand); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + ::std::vector size; + bool is_scalar_expand; + + +}; + +class Cast : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(ltc_cast); + } + + Cast(const torch::lazy::Value& input, const at::ScalarType& dtype, const ::std::optional& stype) + : TsNode( + Cast::ClassOpKind(), + OpList{input}, + compute_shape_cast(input, dtype, stype), + /* num_outputs */ 1, + torch::lazy::MHash(dtype, stype)), + dtype(dtype), + stype(stype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dtype=" << dtype; + if (stype.has_value()) { + ss << ", stype=" << stype.value(); + } else { + ss << ", stype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const at::ScalarType& dtype, const ::std::optional& stype) const { + size_t i = 0; + return (operand(i++) == input && + this->dtype == dtype && + ((!this->stype&&!stype) || (this->stype&&stype && *(this->stype) == *stype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + at::ScalarType dtype; + ::std::optional stype; + + +}; + +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h new file mode 100644 index 0000000000000000000000000000000000000000..56e5f624fcff53187e799126003008a0d2874429 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +namespace torch::lazy { + +TORCH_PYTHON_API void initLazyBindings(PyObject* module); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h new file mode 100644 index 0000000000000000000000000000000000000000..dc5777bb0f45af1f31d11fc08adb7f873f7bfe46 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include + +namespace torch::lazy { + +std::optional TORCH_PYTHON_API GetPythonFrameTop(); + +std::vector TORCH_PYTHON_API GetPythonFrames(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h new file mode 100644 index 0000000000000000000000000000000000000000..0157b30fc7817ed9a74cca3ceb769db3a31b320b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// TODO(whc) unclear if this is useful, has only been tested as true +TORCH_DECLARE_bool(torch_lazy_ts_tensor_update_sync); + +TORCH_DECLARE_bool(torch_lazy_ts_cuda); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h new file mode 100644 index 0000000000000000000000000000000000000000..4c42b0831100df2c145d7d5ddc0933f7c2085460 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_dynamic_shapes); + +namespace torch::lazy { + +/** + * The goal of "dynamic" Nodes is to patch a hole in our tracing. + * Previously, if a user called `sizes` on a Tensor, it would leak out + * of our tracing system, as `sizes` returns a torch.Size or an int. To + * prevent this from happening, we introduce DimensionNode, a new type + * of Node that abstracts the operation of getting the dimensions of a + * Tensor. + * + * Consider the following example: + * ``` + * numel = x.shape()[0] * x.shape()[1] + * ``` + * + * Here, `x.shape()[i]` will be a SizeNode (subclass of DimensionNode), + * and the multiplication of the two SizeNodes will be represented by + * a SizeMul (also a subclass of DimensionNode). Through this, we can + * prevent `numel` from being represented as a Python int and thus + * burned into the Graph. + */ + +// Represents the result of calling `size` on a Tensor +class TORCH_API SizeNode : public TsNode, public DimensionNode { + public: + SizeNode(Value input, size_t dim); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; + size_t dim_ = 0; + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; +}; + +class TORCH_API SizeAdd : public TsNode, public DimensionNode { + public: + SizeAdd(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +class TORCH_API SizeMul : public TsNode, public DimensionNode { + public: + SizeMul(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +class TORCH_API SizeDiv : public TsNode, public DimensionNode { + public: + SizeDiv(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..7d8dd8c804cc68910334fc737043c58e90cc4ea0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h @@ -0,0 +1,74 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +struct TorchScriptIrBuilder : IrBuilder { + NodePtr MakeDeviceData( + const std::shared_ptr& data) const override { + return DeviceData::Create(data); + } + // TODO: Scalar node is not currently used by ts_backend. Enable reusing + // Scalar node later if needed. + NodePtr MakeScalar(const at::Scalar& value, const at::ScalarType& type) + const override { + return MakeNode(value, type); + } + NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) const override { + return ReuseOrMakeNode(input0, size, is_scalar_expand); + } + NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = + std::nullopt) const override { + return ReuseOrMakeNode(input0, dtype, stype); + } + NodePtr MakeTensorList(const OpList& inputs) const override { + return ReuseOrMakeNode(inputs); + } + // Generic needs cleanup + NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = + static_cast(0x5a2d296e9)) const override { + return MakeNode(op, operands, shape, num_outputs, hash_seed); + } + + // dynamic ir nodes + // TODO: verify if IR node reusing works for Dynamic shape ops + NodePtr MakeSizeNode(const Value& input, size_t dim) const override { + return MakeNode(input, dim); + } + NodePtr MakeSizeAdd(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } + NodePtr MakeSizeMul(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } + NodePtr MakeSizeDiv(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h new file mode 100644 index 0000000000000000000000000000000000000000..f258cf99c580b129f640070fd14839230a6f881f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h @@ -0,0 +1,55 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +class TORCH_API DeviceData : public TsNode { + public: + static OpKind ClassOpKind() { + return ltc_device_data; + } + + explicit DeviceData(std::shared_ptr data); + + // A DeviceData node can be reused if the shape matches, + // but we will substitute the actual data_ pointer under + // the hood. + bool CanBeReused(const std::shared_ptr& data) const { + return data_->shape() == data->shape(); + } + + std::string ToString() const override; + + const std::shared_ptr& data() const { + return data_; + } + + void SetData(std::shared_ptr data) { + data_ = std::move(data); + } + + static const DeviceData* Cast(const Node* node); + + // To reuse IR nodes, use this method to create DeviceData nodes + // instead of calling the constructor directconst ly. + static NodePtr Create(const std::shared_ptr& data); + + TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; + + private: + std::shared_ptr data_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h new file mode 100644 index 0000000000000000000000000000000000000000..8334391a593aa80e966f057b94eb47b43834c03e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +// Generic IR Node implementation for nodes which can simply be described by a +// specific OpKind and a lowering function. IR nodes carrying +// metadata should not be using this class TORCH_API (and have the metadata +// captured by the LowerFn), but they should instead create a dedicated IR node. +// Doing the former would limit IR introspection. +class TORCH_API Generic : public TsNode { + public: + Generic( + OpKind op, + OpList operands, + Shape shape, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic( + OpKind op, + OpList operands, + const std::function& shape_fn, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic( + OpKind op, + OpList operands, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic(OpKind op, Shape shape, size_t num_outputs, hash_t hash_seed); + + private: + hash_t hash_seed_; +}; + +inline NodePtr GenericOp( + OpKind op, + OpList operands, + Shape shape, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)) { + return MakeNode( + op, operands, std::move(shape), num_outputs, hash_seed); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..121cd0ffcbc99e5680c16e312013059d6dd5e74c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +// This IR was copied from code-generated output, but the entire _to_copy +// operator cannot be trivially code generated since it is only desirable to +// capture IR for certain permutations of _to_copy (e.g. dtype), and for the +// others it is difficult to even invoke the aten/eager fallback necessitating +// directly implementing the right to(device) behavior +class ToCopy : public torch::lazy::TsNode { + public: + static OpKind ClassOpKind() { + return OpKind(at::aten::_to_copy); + } + + ToCopy( + const torch::lazy::Value& self, + const std::optional& dtype, + const std::optional& layout, + const std::optional& device, + const std::optional& pin_memory, + const bool& non_blocking, + const std::optional& memory_format, + std::vector&& shapes) + : torch::lazy::TsNode( + ClassOpKind(), + {self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash( + dtype, + layout, + device, + pin_memory, + non_blocking, + memory_format)), + + dtype(dtype), + layout(layout), + device(device), + pin_memory(pin_memory), + non_blocking(non_blocking), + memory_format(memory_format) {} + + bool CanBeReused( + const torch::lazy::Value& self, + const std::optional& dtype, + const std::optional& layout, + const std::optional& device, + const std::optional& pin_memory, + const bool& non_blocking, + const std::optional& memory_format) const { + size_t i = 0; + return ( + operand(i++) == self && this->dtype == dtype && + this->layout == layout && this->device == device && + this->pin_memory == pin_memory && this->non_blocking == non_blocking && + this->memory_format == memory_format); + } + + std::string ToString() const override { + std::stringstream ss; + ss << torch::lazy::TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + if (layout.has_value()) { + ss << ", layout=" << layout.value(); + } else { + ss << ", layout=null"; + } + if (device.has_value()) { + ss << ", device=" << device.value(); + } else { + ss << ", device=null"; + } + if (pin_memory.has_value()) { + ss << ", pin_memory=" << pin_memory.value(); + } else { + ss << ", pin_memory=null"; + } + ss << ", non_blocking=" << non_blocking; + if (memory_format.has_value()) { + ss << ", memory_format=" << memory_format.value(); + } else { + ss << ", memory_format=null"; + } + return ss.str(); + } + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(6); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + kwarguments.emplace_back("layout", layout); + kwarguments.emplace_back("device", device); + kwarguments.emplace_back("pin_memory", pin_memory); + kwarguments.emplace_back("non_blocking", non_blocking); + kwarguments.emplace_back("memory_format", memory_format); + torch::lazy::TSOpVector _to_copy_out = + torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_to_copy_out.size(), 1); + + return _to_copy_out; + } + + std::optional dtype; + std::optional layout; + std::optional device; + std::optional pin_memory; + bool non_blocking; + std::optional memory_format; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b6d42fb70a08f6942e1a9c89ea387b0221878cb2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +////////////////////////////////////////////////////////////////////////////// +// ATEN operators follows here, listed in alphabetical order. +////////////////////////////////////////////////////////////////////////////// + +void copy_(torch::lazy::LazyTensorPtr& input, torch::lazy::LazyTensorPtr& src); +// Fills the input with the given value. +void fill_(torch::lazy::LazyTensorPtr& input, const at::Scalar& value); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..3d7ba8436e4923dac4fe138a1a90cb62b084b546 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +struct MaxPool3dAutogradFunctionTS + : public torch::autograd::Function { + static at::Tensor forward( + torch::autograd::AutogradContext* ctx, + const at::Tensor& self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode); + static torch::autograd::variable_list backward( + torch::autograd::AutogradContext* ctx, + torch::autograd::variable_list grad_output); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..d00d8f1812545994e00b2137df9cdc11c1cf20e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +class TORCH_API TSData : public torch::lazy::BackendData { + public: + TSData(const at::Scalar& scalar, const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, torch::lazy::Shape(scalar.type(), {})), + scalar(scalar) {} + + TSData( + at::Tensor data, + const torch::lazy::Shape& shape, + const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, shape), data_(std::move(data)) {} + + TSData( + const torch::lazy::Shape& shape, + const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, shape) {} + + Handle GetHandle() override { + return reinterpret_cast(this); + } + + void Assign(const torch::lazy::BackendData& data) override { + data_ = static_cast(data).data_; + } + + bool HasValue() const override { + return data_.defined(); + } + + at::Tensor data() { + return data_; + } + + std::optional scalar; + + private: + at::Tensor data_; +}; + +TORCH_API torch::lazy::BackendImplInterface* GetTSBackendImpl(); + +TORCH_PYTHON_API void InitTorchScriptBackend(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h new file mode 100644 index 0000000000000000000000000000000000000000..3cbf6f8a37d864acfe1f2be569a1b7cc436801fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::lazy { + +bool force_eager_fallback(c10::Symbol op); +void ltc_eager_fallback( + const c10::OperatorHandle& op, + torch::jit::Stack* stack); + +void ts_eager_fallback( + const c10::OperatorHandle& op, + torch::jit::Stack* stack, + c10::DeviceType device_type); + +// The TorchScript backend does not register itself with pytorch dispatcher +// until it is explicitly initialized. This function should only be called +// by the main Torchscript backend init function. +void register_ts_ltc_eager_fallback(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h new file mode 100644 index 0000000000000000000000000000000000000000..3ab1b3191135cd0ef213962515cc264459f9f28b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h @@ -0,0 +1,156 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using TSOpVector = std::vector; + +class TORCH_API TSComputation : public Computation { + public: + TSComputation(const std::shared_ptr& graph) + : graph_(graph), graph_executor_(graph, "") { + for (torch::jit::Value* input : graph_->inputs()) { + parameter_names_.push_back(input->debugName()); + } + } + + int parameters_size() const override { + return static_cast(parameter_names_.size()); + } + + const std::vector& parameter_shapes() const override { + TORCH_CHECK( + false, "TODO(whc) implement TS computation shapes or change interface"); + return parameter_shapes_; + } + + const std::vector& parameter_names() const override { + return parameter_names_; + } + + const Shape& result_shape() const override { + TORCH_CHECK( + false, "TODO(whc) implement TS computation shapes or change interface"); + return result_shape_; + } + + const std::string to_string() const override { + std::ostringstream oss; + oss << *graph_; + return oss.str(); + } + + std::shared_ptr graph() const { + return graph_; + } + + torch::jit::GraphExecutor& graph_executor() { + return graph_executor_; + } + + private: + std::shared_ptr graph_; + torch::jit::GraphExecutor graph_executor_; + std::vector parameter_names_; + std::vector parameter_shapes_; + Shape result_shape_; +}; + +class TORCH_API TSLoweringContext : public LoweringContext { + public: + TSLoweringContext(const std::string& name, const BackendDevice device); + + TSLoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + size_t AddResult(const Output& output) override { + return AddResult(GetOutputOp(output)); + } + + void AddParameter( + const torch::lazy::Output& output, + size_t index, + const Shape& shape, + const std::string& name) override { + TORCH_INTERNAL_ASSERT(false, "not implemented"); + } + + void Lower(const Node* node); + + ComputationPtr Build() override { + for (torch::jit::Value* output : root_tuple_) { + graph_->block()->registerOutput(output); + } + return std::make_shared(graph_); + } + + // Retrieves the lowered operation for an output. If the requested output is + // not available yet, the graph behind the output's Node is lowered, and the + // corresponding TS operation returned. + torch::jit::Value* GetOutputOp(const Output& output) { + auto it = emitted_outputs_.find(output); + if (it == emitted_outputs_.end()) { + auto post_order = Util::ComputePostOrder(output.node, &emit_status_); + for (auto node : post_order) { + Lower(node); + } + // At this point the output better be present, otherwise there is an issue + // with the lowering code. + it = emitted_outputs_.find(output); + TORCH_CHECK( + it != emitted_outputs_.end(), + "No TS operation emitted for output: ", + output.ToString()); + } + return it->second; + } + + // Assigns the given TS operation to the specified output. As outputs are + // lowered in a post-order fashion, later nodes should always find their + // operands among the emitted outputs. + void AssignOutputOp(const Output& output, torch::jit::Value* op); + + // If a parameter associated with data has already been declared, it will be + // returned. Otherwise a new one will be created, associated with the tensor + // held in data. + torch::jit::Value* GetParameter(const BackendDataPtr& data); + + std::shared_ptr graph() const { + return graph_; + } + + private: + struct Parameter { + torch::jit::Value* param{nullptr}; + size_t index = 0; + }; + + size_t AddResult(torch::jit::Value* op) { + root_tuple_.push_back(op); + return root_tuple_.size() - 1; + } + + std::shared_ptr graph_; + std::shared_ptr function_; + std::unordered_map parameters_map_; + std::vector root_tuple_; + OutputMap emitted_outputs_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h new file mode 100644 index 0000000000000000000000000000000000000000..5efd7eed90acd7f260b9f9a64fd86819cc9fb6c3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using TSOpVector = std::vector; + +class TORCH_API TsNode : public lazy::Node { + public: + TsNode( + OpKind op, + OpList operands, + std::vector&& shapes, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + OpList operands, + const std::function& shape_fn, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + OpList operands, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + Shape shape, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + ~TsNode() override = default; + + hash_t hash() const override; + + hash_t shapeHash() const override; + + const std::string getPythonStacktrace() const; + + // Lower is a backend-specific method since it returns a backend specific + // type. hence, it is convenient to define it differently per-backend rather + // than at Node API + virtual TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const; + + private: + // The hash of the dag WITH size info. Used for shape caching + hash_t shape_hash_; + // The hash of the dag used to look up the compiled graph by a hash + // in this case, we will use the dag hash WITHOUT size info if dynamic shape + // is enabled and use the dag hash WITH size info otherwise. + hash_t dag_hash_; +}; + +// Note: this OpKind is separate from ltc_ops.h since it would be a circular +// import otherwise, I like leaving TensorList in this file, and I think most of +// ltc_ops special cases will be deleted anyway +const OpKind tensor_list_opkind = OpKind::Get("lazy_tensors::tensor_list"); + +// TensorList represents an at::TensorList which is a vector[Tensor] but is also +// a first-class IValue and can be fed as a single input to a TS program. It is +// much easier to handle TensorLists in Lazy Tensor code if they are represented +// as a single Node so there can be more than one TensorList and more than one +// Tensor side-by-side as operands to an op. +// +// Note: shape is undefined for TensorList. We assert in some places that +// #shapes matches #outputs and this stems from +// the fact that currently all IR nodes represent tensors (there is no +// type system for this IR). Because of this, TensorList is a bit of a +// hack. +// +// TODO(whc) once Shape() API is moved to Node base, also make it virtual, and +// then implement it as NotImplemented for TensorList, also fixing the assertion +// that would fail. +struct TORCH_API TensorList : public TsNode { + static OpKind ClassOpKind() { + return tensor_list_opkind; + } + + TensorList() = delete; + TensorList(OpList values); + + bool CanBeReused(OpList values) const { + return operands() == std::vector(values.begin(), values.end()); + } + + TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h new file mode 100644 index 0000000000000000000000000000000000000000..37a11e964bb5e8e4eeaff374b8a5b7792c3502b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::lazy { +using TSOpVector = std::vector; + +TORCH_API TSOpVector LowerTSBuiltin( + const std::shared_ptr& function, + c10::Symbol sym, + const std::vector& arguments, + const std::vector& kwarguments = {}); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h new file mode 100644 index 0000000000000000000000000000000000000000..b50d495618218335dd644f3da308eb59f28a9eac --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace torch::mtia { +using namespace torch::profiler::impl::python_tracer; + +void initMemoryProfiler(); + +std::unique_ptr getMemoryTracer(); + +class MTIAMemoryProfiler final : public PythonMemoryTracerBase { + public: + explicit MTIAMemoryProfiler() = default; + ~MTIAMemoryProfiler() override = default; + void start() override; + void stop() override; + void export_memory_history(const std::string& path) override; +}; + +} // namespace torch::mtia + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)